summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/acpi.c75
-rw-r--r--arch/arm64/kernel/cpufeature.c149
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/crash_core.c10
-rw-r--r--arch/arm64/kernel/entry.S96
-rw-r--r--arch/arm64/kernel/module-plts.c46
-rw-r--r--arch/arm64/kernel/perf_event.c89
-rw-r--r--arch/arm64/kernel/setup.c24
-rw-r--r--arch/arm64/kernel/stacktrace.c2
-rw-r--r--arch/arm64/kernel/traps.c2
-rw-r--r--arch/arm64/kernel/vdso.c136
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S5
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S5
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S1
14 files changed, 500 insertions, 141 deletions
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index a7586a4db142..455966401102 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -261,6 +261,81 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
return __pgprot(PROT_DEVICE_nGnRnE);
}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+{
+ efi_memory_desc_t *md, *region = NULL;
+ pgprot_t prot;
+
+ if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
+ return NULL;
+
+ for_each_efi_memory_desc(md) {
+ u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (phys < md->phys_addr || phys >= end)
+ continue;
+
+ if (phys + size > end) {
+ pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
+ return NULL;
+ }
+ region = md;
+ break;
+ }
+
+ /*
+ * It is fine for AML to remap regions that are not represented in the
+ * EFI memory map at all, as it only describes normal memory, and MMIO
+ * regions that require a virtual mapping to make them accessible to
+ * the EFI runtime services.
+ */
+ prot = __pgprot(PROT_DEVICE_nGnRnE);
+ if (region) {
+ switch (region->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ case EFI_PERSISTENT_MEMORY:
+ pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys);
+ return NULL;
+
+ case EFI_RUNTIME_SERVICES_CODE:
+ /*
+ * This would be unusual, but not problematic per se,
+ * as long as we take care not to create a writable
+ * mapping for executable code.
+ */
+ prot = PAGE_KERNEL_RO;
+ break;
+
+ case EFI_ACPI_RECLAIM_MEMORY:
+ /*
+ * ACPI reclaim memory is used to pass firmware tables
+ * and other data that is intended for consumption by
+ * the OS only, which may decide it wants to reclaim
+ * that memory and use it for something else. We never
+ * do that, but we usually add it to the linear map
+ * anyway, in which case we should use the existing
+ * mapping.
+ */
+ if (memblock_is_map_memory(phys))
+ return (void __iomem *)__phys_to_virt(phys);
+ /* fall through */
+
+ default:
+ if (region->attribute & EFI_MEMORY_WB)
+ prot = PAGE_KERNEL;
+ else if (region->attribute & EFI_MEMORY_WT)
+ prot = __pgprot(PROT_NORMAL_WT);
+ else if (region->attribute & EFI_MEMORY_WC)
+ prot = __pgprot(PROT_NORMAL_NC);
+ }
+ }
+ return __ioremap(phys, size, prot);
+}
+
/*
* Claim Synchronous External Aborts as a firmware first notification.
*
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9fae0efc80c1..a389b999482e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -119,25 +119,12 @@ static inline void finalize_system_capabilities(void)
static_branch_enable(&arm64_const_caps_ready);
}
-static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
+void dump_cpu_features(void)
{
/* file-wide pr_fmt adds "CPU features: " prefix */
pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
- return 0;
}
-static struct notifier_block cpu_hwcaps_notifier = {
- .notifier_call = dump_cpu_hwcaps
-};
-
-static int __init register_cpu_hwcaps_dumper(void)
-{
- atomic_notifier_chain_register(&panic_notifier_list,
- &cpu_hwcaps_notifier);
- return 0;
-}
-__initcall(register_cpu_hwcaps_dumper);
-
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
@@ -269,6 +256,9 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0),
/*
* Page size not being supported at Stage-2 is not fatal. You
* just give up KVM if PAGE_SIZE isn't supported there. Go fix
@@ -312,6 +302,10 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
@@ -323,8 +317,15 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
@@ -345,7 +346,7 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
* make use of *minLine.
* If we have differing I-cache policies, report it as the weakest - VIPT.
*/
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -356,19 +357,19 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_INNERSHR_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_FCSE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_AUXREG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_TCM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_SHARELVL_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_OUTERSHR_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_PMSA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_VMSA_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 36, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
@@ -384,14 +385,14 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
};
static const struct arm64_ftr_bits ftr_mvfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_dczid[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -423,7 +424,8 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_AC2_SHIFT, 4, 0),
+
/*
* SpecSEI = 1 indicates that the PE might generate an SError on an
* external abort on speculative read. It is safe to assume that an
@@ -465,10 +467,10 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = {
static const struct arm64_ftr_bits ftr_id_pfr0[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE0_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -492,13 +494,13 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = {
static const struct arm64_ftr_bits ftr_id_dfr0[] = {
/* [31:28] TraceFilt */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPDBG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPSDBG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPDBG_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -697,11 +699,52 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
static void __init sort_ftr_regs(void)
{
- int i;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(arm64_ftr_regs); i++) {
+ const struct arm64_ftr_reg *ftr_reg = arm64_ftr_regs[i].reg;
+ const struct arm64_ftr_bits *ftr_bits = ftr_reg->ftr_bits;
+ unsigned int j = 0;
+
+ /*
+ * Features here must be sorted in descending order with respect
+ * to their shift values and should not overlap with each other.
+ */
+ for (; ftr_bits->width != 0; ftr_bits++, j++) {
+ unsigned int width = ftr_reg->ftr_bits[j].width;
+ unsigned int shift = ftr_reg->ftr_bits[j].shift;
+ unsigned int prev_shift;
+
+ WARN((shift + width) > 64,
+ "%s has invalid feature at shift %d\n",
+ ftr_reg->name, shift);
+
+ /*
+ * Skip the first feature. There is nothing to
+ * compare against for now.
+ */
+ if (j == 0)
+ continue;
+
+ prev_shift = ftr_reg->ftr_bits[j - 1].shift;
+ WARN((shift + width) > prev_shift,
+ "%s has feature overlap at shift %d\n",
+ ftr_reg->name, shift);
+ }
- /* Check that the array is sorted so that we can do the binary search */
- for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++)
+ /*
+ * Skip the first register. There is nothing to
+ * compare against for now.
+ */
+ if (i == 0)
+ continue;
+ /*
+ * Registers here must be sorted in ascending order with respect
+ * to sys_id for subsequent binary search in get_arm64_ftr_reg()
+ * to work correctly.
+ */
BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
+ }
}
/*
@@ -1884,6 +1927,26 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.cpu_enable = cpu_has_fwb,
},
+ {
+ .desc = "ARMv8.4 Translation Table Level",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_ARMv8_4_TTL,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR2_TTL_SHIFT,
+ .min_field_value = 1,
+ .matches = has_cpuid_feature,
+ },
+ {
+ .desc = "TLB range maintenance instructions",
+ .capability = ARM64_HAS_TLB_RANGE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR0_EL1,
+ .field_pos = ID_AA64ISAR0_TLB_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = ID_AA64ISAR0_TLB_RANGE,
+ },
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 86637466daa8..393c6fb1f1cb 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -93,6 +93,7 @@ static const char *const hwcap_str[] = {
"dgh",
"rng",
"bti",
+ /* reserved for "mte" */
NULL
};
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c
index 1f646b07e3e9..314391a156ee 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/crash_core.c
@@ -7,6 +7,14 @@
#include <linux/crash_core.h>
#include <asm/cpufeature.h>
#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+static inline u64 get_tcr_el1_t1sz(void);
+
+static inline u64 get_tcr_el1_t1sz(void)
+{
+ return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+}
void arch_crash_save_vmcoreinfo(void)
{
@@ -16,6 +24,8 @@ void arch_crash_save_vmcoreinfo(void)
kimage_voffset);
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
PHYS_OFFSET);
+ vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n",
+ get_tcr_el1_t1sz());
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n",
system_supports_address_auth() ?
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 35de8ba60e3d..2646178c8329 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -15,6 +15,7 @@
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/asm_pointer_auth.h>
+#include <asm/bug.h>
#include <asm/cpufeature.h>
#include <asm/errno.h>
#include <asm/esr.h>
@@ -226,28 +227,9 @@ alternative_else_nop_endif
add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- /*
- * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
- * EL0, there is no need to check the state of TTBR0_EL1 since
- * accesses are always enabled.
- * Note that the meaning of this bit differs from the ARMv8.1 PAN
- * feature as all TTBR0_EL1 accesses are disabled, not just those to
- * user mappings.
- */
-alternative_if ARM64_HAS_PAN
- b 1f // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+ bl __swpan_entry_el\el
alternative_else_nop_endif
-
- .if \el != 0
- mrs x21, ttbr0_el1
- tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
- orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
- b.eq 1f // TTBR0 access already disabled
- and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
- .endif
-
- __uaccess_ttbr0_disable x21
-1:
#endif
stp x22, x23, [sp, #S_PC]
@@ -301,34 +283,9 @@ alternative_else_nop_endif
.endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- /*
- * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
- * PAN bit checking.
- */
-alternative_if ARM64_HAS_PAN
- b 2f // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+ bl __swpan_exit_el\el
alternative_else_nop_endif
-
- .if \el != 0
- tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
- .endif
-
- __uaccess_ttbr0_enable x0, x1
-
- .if \el == 0
- /*
- * Enable errata workarounds only if returning to user. The only
- * workaround currently required for TTBR0_EL1 changes are for the
- * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
- * corruption).
- */
- bl post_ttbr_update_workaround
- .endif
-1:
- .if \el != 0
- and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
- .endif
-2:
#endif
.if \el == 0
@@ -401,6 +358,49 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
sb
.endm
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+ * EL0, there is no need to check the state of TTBR0_EL1 since
+ * accesses are always enabled.
+ * Note that the meaning of this bit differs from the ARMv8.1 PAN
+ * feature as all TTBR0_EL1 accesses are disabled, not just those to
+ * user mappings.
+ */
+SYM_CODE_START_LOCAL(__swpan_entry_el1)
+ mrs x21, ttbr0_el1
+ tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
+ orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
+ b.eq 1f // TTBR0 access already disabled
+ and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
+SYM_INNER_LABEL(__swpan_entry_el0, SYM_L_LOCAL)
+ __uaccess_ttbr0_disable x21
+1: ret
+SYM_CODE_END(__swpan_entry_el1)
+
+ /*
+ * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+ * PAN bit checking.
+ */
+SYM_CODE_START_LOCAL(__swpan_exit_el1)
+ tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+ __uaccess_ttbr0_enable x0, x1
+1: and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
+ ret
+SYM_CODE_END(__swpan_exit_el1)
+
+SYM_CODE_START_LOCAL(__swpan_exit_el0)
+ __uaccess_ttbr0_enable x0, x1
+ /*
+ * Enable errata workarounds only if returning to user. The only
+ * workaround currently required for TTBR0_EL1 changes are for the
+ * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+ * corruption).
+ */
+ b post_ttbr_update_workaround
+SYM_CODE_END(__swpan_exit_el0)
+#endif
+
.macro irq_stack_entry
mov x19, sp // preserve the original sp
#ifdef CONFIG_SHADOW_CALL_STACK
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 65b08a74aec6..0ce3a28e3347 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -253,6 +253,40 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
return ret;
}
+static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela,
+ Elf64_Word dstidx)
+{
+
+ Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info);
+
+ if (s->st_shndx == dstidx)
+ return false;
+
+ return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 ||
+ ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26;
+}
+
+/* Group branch PLT relas at the front end of the array. */
+static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
+ int numrels, Elf64_Word dstidx)
+{
+ int i = 0, j = numrels - 1;
+
+ if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return 0;
+
+ while (i < j) {
+ if (branch_rela_needs_plt(syms, &rela[i], dstidx))
+ i++;
+ else if (branch_rela_needs_plt(syms, &rela[j], dstidx))
+ swap(rela[i], rela[j]);
+ else
+ j--;
+ }
+
+ return i;
+}
+
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
@@ -290,7 +324,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
for (i = 0; i < ehdr->e_shnum; i++) {
Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
- int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+ int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
if (sechdrs[i].sh_type != SHT_RELA)
@@ -300,8 +334,14 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
if (!(dstsec->sh_flags & SHF_EXECINSTR))
continue;
- /* sort by type, symbol index and addend */
- sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+ /*
+ * sort branch relocations requiring a PLT by type, symbol index
+ * and addend
+ */
+ nents = partition_branch_plt_relas(syms, rels, numrels,
+ sechdrs[i].sh_info);
+ if (nents)
+ sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL);
if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
core_plts += count_plts(syms, rels, numrels,
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4d7879484cec..462f9a9cc44b 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -13,12 +13,15 @@
#include <asm/sysreg.h>
#include <asm/virt.h>
+#include <clocksource/arm_arch_timer.h>
+
#include <linux/acpi.h>
#include <linux/clocksource.h>
#include <linux/kvm_host.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
+#include <linux/sched_clock.h>
#include <linux/smp.h>
/* ARMv8 Cortex-A53 specific event types. */
@@ -155,7 +158,7 @@ armv8pmu_events_sysfs_show(struct device *dev,
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
- return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
+ return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
}
#define ARMV8_EVENT_ATTR(name, config) \
@@ -222,10 +225,29 @@ static struct attribute *armv8_pmuv3_event_attrs[] = {
ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+ ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED),
+ ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC),
+ ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL),
+ ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND),
+ ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT),
ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
+ ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES),
+ ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM),
+ ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS),
+ ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
+ ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
+ ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED),
+ ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD),
+ ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR),
NULL,
};
@@ -244,10 +266,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj,
test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
return attr->mode;
- pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
- if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
- test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap))
- return attr->mode;
+ if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) {
+ u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
+
+ if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
+ test_bit(id, cpu_pmu->pmceid_ext_bitmap))
+ return attr->mode;
+ }
return 0;
}
@@ -1165,28 +1190,54 @@ device_initcall(armv8_pmu_driver_init)
void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now)
{
- u32 freq;
- u32 shift;
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
- /*
- * Internal timekeeping for enabled/running/stopped times
- * is always computed with the sched_clock.
- */
- freq = arch_timer_get_rate();
- userpg->cap_user_time = 1;
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
+
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ if (rd->read_sched_clock != arch_timer_read_counter)
+ return;
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
- clocks_calc_mult_shift(&userpg->time_mult, &shift, freq,
- NSEC_PER_SEC, 0);
/*
* time_shift is not expected to be greater than 31 due to
* the original published conversion algorithm shifting a
* 32-bit value (now specifies a 64-bit value) - refer
* perf_event_mmap_page documentation in perf_event.h.
*/
- if (shift == 32) {
- shift = 31;
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
userpg->time_mult >>= 1;
}
- userpg->time_shift = (u16)shift;
- userpg->time_offset = -now;
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 93b3844cf442..c793276ec7ad 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -400,11 +400,7 @@ static int __init topology_init(void)
}
subsys_initcall(topology_init);
-/*
- * Dump out kernel offset information on panic.
- */
-static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
- void *p)
+static void dump_kernel_offset(void)
{
const unsigned long offset = kaslr_offset();
@@ -415,17 +411,25 @@ static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
} else {
pr_emerg("Kernel Offset: disabled\n");
}
+}
+
+static int arm64_panic_block_dump(struct notifier_block *self,
+ unsigned long v, void *p)
+{
+ dump_kernel_offset();
+ dump_cpu_features();
+ dump_mem_limit();
return 0;
}
-static struct notifier_block kernel_offset_notifier = {
- .notifier_call = dump_kernel_offset
+static struct notifier_block arm64_panic_block = {
+ .notifier_call = arm64_panic_block_dump
};
-static int __init register_kernel_offset_dumper(void)
+static int __init register_arm64_panic_block(void)
{
atomic_notifier_chain_register(&panic_notifier_list,
- &kernel_offset_notifier);
+ &arm64_panic_block);
return 0;
}
-__initcall(register_kernel_offset_dumper);
+device_initcall(register_arm64_panic_block);
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 139679c745bf..2dd8e3b8b94b 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -199,12 +199,12 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
put_task_stack(tsk);
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
__save_stack_trace(tsk, trace, 1);
}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void save_stack_trace(struct stack_trace *trace)
{
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 47f651df781c..13ebd5ca2070 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -855,7 +855,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
tsk_stk, tsk_stk + THREAD_SIZE);
pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n",
- irq_stk, irq_stk + THREAD_SIZE);
+ irq_stk, irq_stk + IRQ_STACK_SIZE);
pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index e546df0efefb..d4202a32abc9 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
+#include <linux/time_namespace.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
#include <vdso/datapage.h>
@@ -40,6 +41,12 @@ enum vdso_abi {
#endif /* CONFIG_COMPAT_VDSO */
};
+enum vvar_pages {
+ VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
+ VVAR_NR_PAGES,
+};
+
struct vdso_abi_info {
const char *name;
const char *vdso_code_start;
@@ -107,25 +114,122 @@ static int __vdso_init(enum vdso_abi abi)
vdso_info[abi].vdso_code_start) >>
PAGE_SHIFT;
- /* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1,
+ vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
sizeof(struct page *),
GFP_KERNEL);
if (vdso_pagelist == NULL)
return -ENOMEM;
- /* Grab the vDSO data page. */
- vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
-
/* Grab the vDSO code pages. */
pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
for (i = 0; i < vdso_info[abi].vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+ vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+ vdso_info[abi].cm->pages = vdso_pagelist;
+
+ return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
- vdso_info[abi].dm->pages = &vdso_pagelist[0];
- vdso_info[abi].cm->pages = &vdso_pagelist[1];
+ if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
+ zap_page_range(vma, vma->vm_start, size);
+#ifdef CONFIG_COMPAT_VDSO
+ if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
+ zap_page_range(vma, vma->vm_start, size);
+#endif
+ }
+
+ mmap_read_unlock(mm);
+ return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops.
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+ WARN(1, "vvar_page accessed remotely");
+
+ return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long pfn;
+
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ if (timens_page)
+ pfn = page_to_pfn(timens_page);
+ else
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static int vvar_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+ if (new_size != VVAR_NR_PAGES * PAGE_SIZE)
+ return -EINVAL;
return 0;
}
@@ -139,9 +243,11 @@ static int __setup_additional_pages(enum vdso_abi abi,
unsigned long gp_flags = 0;
void *ret;
+ BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+
vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+ vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -149,8 +255,8 @@ static int __setup_additional_pages(enum vdso_abi abi,
goto up_fail;
}
- ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_MAYREAD,
+ ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
+ VM_READ|VM_MAYREAD|VM_PFNMAP,
vdso_info[abi].dm);
if (IS_ERR(ret))
goto up_fail;
@@ -158,7 +264,7 @@ static int __setup_additional_pages(enum vdso_abi abi,
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
gp_flags = VM_ARM64_BTI;
- vdso_base += PAGE_SIZE;
+ vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|gp_flags|
@@ -206,6 +312,8 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
#ifdef CONFIG_COMPAT_VDSO
[AA32_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
+ .mremap = vvar_mremap,
},
[AA32_MAP_VDSO] = {
.name = "[vdso]",
@@ -371,6 +479,8 @@ enum aarch64_map {
static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
[AA64_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
+ .mremap = vvar_mremap,
},
[AA64_MAP_VDSO] = {
.name = "[vdso]",
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 7ad2d3a0cd48..d808ad31e01f 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -17,7 +17,10 @@ OUTPUT_ARCH(aarch64)
SECTIONS
{
- PROVIDE(_vdso_data = . - PAGE_SIZE);
+ PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index 337d03522048..3348ce5ea306 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -17,7 +17,10 @@ OUTPUT_ARCH(arm)
SECTIONS
{
- PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+ PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5423ffe0a987..ec8e894684a7 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -10,7 +10,6 @@
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/kernel-pgtable.h>
-#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/page.h>