diff options
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/acpi.c | 75 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 149 | ||||
-rw-r--r-- | arch/arm64/kernel/cpuinfo.c | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/crash_core.c | 10 | ||||
-rw-r--r-- | arch/arm64/kernel/entry.S | 96 | ||||
-rw-r--r-- | arch/arm64/kernel/module-plts.c | 46 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_event.c | 89 | ||||
-rw-r--r-- | arch/arm64/kernel/setup.c | 24 | ||||
-rw-r--r-- | arch/arm64/kernel/stacktrace.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/vdso.c | 136 | ||||
-rw-r--r-- | arch/arm64/kernel/vdso/vdso.lds.S | 5 | ||||
-rw-r--r-- | arch/arm64/kernel/vdso32/vdso.lds.S | 5 | ||||
-rw-r--r-- | arch/arm64/kernel/vmlinux.lds.S | 1 |
14 files changed, 500 insertions, 141 deletions
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index a7586a4db142..455966401102 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -261,6 +261,81 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_DEVICE_nGnRnE); } +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + efi_memory_desc_t *md, *region = NULL; + pgprot_t prot; + + if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP))) + return NULL; + + for_each_efi_memory_desc(md) { + u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + + if (phys < md->phys_addr || phys >= end) + continue; + + if (phys + size > end) { + pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n"); + return NULL; + } + region = md; + break; + } + + /* + * It is fine for AML to remap regions that are not represented in the + * EFI memory map at all, as it only describes normal memory, and MMIO + * regions that require a virtual mapping to make them accessible to + * the EFI runtime services. + */ + prot = __pgprot(PROT_DEVICE_nGnRnE); + if (region) { + switch (region->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + case EFI_PERSISTENT_MEMORY: + pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); + return NULL; + + case EFI_RUNTIME_SERVICES_CODE: + /* + * This would be unusual, but not problematic per se, + * as long as we take care not to create a writable + * mapping for executable code. + */ + prot = PAGE_KERNEL_RO; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + /* + * ACPI reclaim memory is used to pass firmware tables + * and other data that is intended for consumption by + * the OS only, which may decide it wants to reclaim + * that memory and use it for something else. We never + * do that, but we usually add it to the linear map + * anyway, in which case we should use the existing + * mapping. + */ + if (memblock_is_map_memory(phys)) + return (void __iomem *)__phys_to_virt(phys); + /* fall through */ + + default: + if (region->attribute & EFI_MEMORY_WB) + prot = PAGE_KERNEL; + else if (region->attribute & EFI_MEMORY_WT) + prot = __pgprot(PROT_NORMAL_WT); + else if (region->attribute & EFI_MEMORY_WC) + prot = __pgprot(PROT_NORMAL_NC); + } + } + return __ioremap(phys, size, prot); +} + /* * Claim Synchronous External Aborts as a firmware first notification. * diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9fae0efc80c1..a389b999482e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -119,25 +119,12 @@ static inline void finalize_system_capabilities(void) static_branch_enable(&arm64_const_caps_ready); } -static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) +void dump_cpu_features(void) { /* file-wide pr_fmt adds "CPU features: " prefix */ pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); - return 0; } -static struct notifier_block cpu_hwcaps_notifier = { - .notifier_call = dump_cpu_hwcaps -}; - -static int __init register_cpu_hwcaps_dumper(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, - &cpu_hwcaps_notifier); - return 0; -} -__initcall(register_cpu_hwcaps_dumper); - DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); @@ -269,6 +256,9 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), /* * Page size not being supported at Stage-2 is not fatal. You * just give up KVM if PAGE_SIZE isn't supported there. Go fix @@ -312,6 +302,10 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0), @@ -323,8 +317,15 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -345,7 +346,7 @@ static const struct arm64_ftr_bits ftr_ctr[] = { * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -356,19 +357,19 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_INNERSHR_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_FCSE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_AUXREG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_TCM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_SHARELVL_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_OUTERSHR_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_PMSA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_VMSA_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 36, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), @@ -384,14 +385,14 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { }; static const struct arm64_ftr_bits ftr_mvfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_dczid[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -423,7 +424,8 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_AC2_SHIFT, 4, 0), + /* * SpecSEI = 1 indicates that the PE might generate an SError on an * external abort on speculative read. It is safe to assume that an @@ -465,10 +467,10 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = { static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE0_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -492,13 +494,13 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = { static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPDBG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPSDBG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPDBG_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -697,11 +699,52 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, static void __init sort_ftr_regs(void) { - int i; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(arm64_ftr_regs); i++) { + const struct arm64_ftr_reg *ftr_reg = arm64_ftr_regs[i].reg; + const struct arm64_ftr_bits *ftr_bits = ftr_reg->ftr_bits; + unsigned int j = 0; + + /* + * Features here must be sorted in descending order with respect + * to their shift values and should not overlap with each other. + */ + for (; ftr_bits->width != 0; ftr_bits++, j++) { + unsigned int width = ftr_reg->ftr_bits[j].width; + unsigned int shift = ftr_reg->ftr_bits[j].shift; + unsigned int prev_shift; + + WARN((shift + width) > 64, + "%s has invalid feature at shift %d\n", + ftr_reg->name, shift); + + /* + * Skip the first feature. There is nothing to + * compare against for now. + */ + if (j == 0) + continue; + + prev_shift = ftr_reg->ftr_bits[j - 1].shift; + WARN((shift + width) > prev_shift, + "%s has feature overlap at shift %d\n", + ftr_reg->name, shift); + } - /* Check that the array is sorted so that we can do the binary search */ - for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++) + /* + * Skip the first register. There is nothing to + * compare against for now. + */ + if (i == 0) + continue; + /* + * Registers here must be sorted in ascending order with respect + * to sys_id for subsequent binary search in get_arm64_ftr_reg() + * to work correctly. + */ BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id); + } } /* @@ -1884,6 +1927,26 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .cpu_enable = cpu_has_fwb, }, + { + .desc = "ARMv8.4 Translation Table Level", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_ARMv8_4_TTL, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_TTL_SHIFT, + .min_field_value = 1, + .matches = has_cpuid_feature, + }, + { + .desc = "TLB range maintenance instructions", + .capability = ARM64_HAS_TLB_RANGE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_TLB_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64ISAR0_TLB_RANGE, + }, #ifdef CONFIG_ARM64_HW_AFDBM { /* diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 86637466daa8..393c6fb1f1cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -93,6 +93,7 @@ static const char *const hwcap_str[] = { "dgh", "rng", "bti", + /* reserved for "mte" */ NULL }; diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index 1f646b07e3e9..314391a156ee 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -7,6 +7,14 @@ #include <linux/crash_core.h> #include <asm/cpufeature.h> #include <asm/memory.h> +#include <asm/pgtable-hwdef.h> + +static inline u64 get_tcr_el1_t1sz(void); + +static inline u64 get_tcr_el1_t1sz(void) +{ + return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET; +} void arch_crash_save_vmcoreinfo(void) { @@ -16,6 +24,8 @@ void arch_crash_save_vmcoreinfo(void) kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", PHYS_OFFSET); + vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n", + get_tcr_el1_t1sz()); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n", system_supports_address_auth() ? diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 35de8ba60e3d..2646178c8329 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -15,6 +15,7 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/asm_pointer_auth.h> +#include <asm/bug.h> #include <asm/cpufeature.h> #include <asm/errno.h> #include <asm/esr.h> @@ -226,28 +227,9 @@ alternative_else_nop_endif add x29, sp, #S_STACKFRAME #ifdef CONFIG_ARM64_SW_TTBR0_PAN - /* - * Set the TTBR0 PAN bit in SPSR. When the exception is taken from - * EL0, there is no need to check the state of TTBR0_EL1 since - * accesses are always enabled. - * Note that the meaning of this bit differs from the ARMv8.1 PAN - * feature as all TTBR0_EL1 accesses are disabled, not just those to - * user mappings. - */ -alternative_if ARM64_HAS_PAN - b 1f // skip TTBR0 PAN +alternative_if_not ARM64_HAS_PAN + bl __swpan_entry_el\el alternative_else_nop_endif - - .if \el != 0 - mrs x21, ttbr0_el1 - tst x21, #TTBR_ASID_MASK // Check for the reserved ASID - orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR - b.eq 1f // TTBR0 access already disabled - and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR - .endif - - __uaccess_ttbr0_disable x21 -1: #endif stp x22, x23, [sp, #S_PC] @@ -301,34 +283,9 @@ alternative_else_nop_endif .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN - /* - * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR - * PAN bit checking. - */ -alternative_if ARM64_HAS_PAN - b 2f // skip TTBR0 PAN +alternative_if_not ARM64_HAS_PAN + bl __swpan_exit_el\el alternative_else_nop_endif - - .if \el != 0 - tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set - .endif - - __uaccess_ttbr0_enable x0, x1 - - .if \el == 0 - /* - * Enable errata workarounds only if returning to user. The only - * workaround currently required for TTBR0_EL1 changes are for the - * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache - * corruption). - */ - bl post_ttbr_update_workaround - .endif -1: - .if \el != 0 - and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit - .endif -2: #endif .if \el == 0 @@ -401,6 +358,49 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 sb .endm +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Set the TTBR0 PAN bit in SPSR. When the exception is taken from + * EL0, there is no need to check the state of TTBR0_EL1 since + * accesses are always enabled. + * Note that the meaning of this bit differs from the ARMv8.1 PAN + * feature as all TTBR0_EL1 accesses are disabled, not just those to + * user mappings. + */ +SYM_CODE_START_LOCAL(__swpan_entry_el1) + mrs x21, ttbr0_el1 + tst x21, #TTBR_ASID_MASK // Check for the reserved ASID + orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR + b.eq 1f // TTBR0 access already disabled + and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR +SYM_INNER_LABEL(__swpan_entry_el0, SYM_L_LOCAL) + __uaccess_ttbr0_disable x21 +1: ret +SYM_CODE_END(__swpan_entry_el1) + + /* + * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR + * PAN bit checking. + */ +SYM_CODE_START_LOCAL(__swpan_exit_el1) + tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set + __uaccess_ttbr0_enable x0, x1 +1: and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit + ret +SYM_CODE_END(__swpan_exit_el1) + +SYM_CODE_START_LOCAL(__swpan_exit_el0) + __uaccess_ttbr0_enable x0, x1 + /* + * Enable errata workarounds only if returning to user. The only + * workaround currently required for TTBR0_EL1 changes are for the + * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache + * corruption). + */ + b post_ttbr_update_workaround +SYM_CODE_END(__swpan_exit_el0) +#endif + .macro irq_stack_entry mov x19, sp // preserve the original sp #ifdef CONFIG_SHADOW_CALL_STACK diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 65b08a74aec6..0ce3a28e3347 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -253,6 +253,40 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, return ret; } +static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela, + Elf64_Word dstidx) +{ + + Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info); + + if (s->st_shndx == dstidx) + return false; + + return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 || + ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26; +} + +/* Group branch PLT relas at the front end of the array. */ +static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela, + int numrels, Elf64_Word dstidx) +{ + int i = 0, j = numrels - 1; + + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return 0; + + while (i < j) { + if (branch_rela_needs_plt(syms, &rela[i], dstidx)) + i++; + else if (branch_rela_needs_plt(syms, &rela[j], dstidx)) + swap(rela[i], rela[j]); + else + j--; + } + + return i; +} + int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { @@ -290,7 +324,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, for (i = 0; i < ehdr->e_shnum; i++) { Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; - int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); + int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; if (sechdrs[i].sh_type != SHT_RELA) @@ -300,8 +334,14 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dstsec->sh_flags & SHF_EXECINSTR)) continue; - /* sort by type, symbol index and addend */ - sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); + /* + * sort branch relocations requiring a PLT by type, symbol index + * and addend + */ + nents = partition_branch_plt_relas(syms, rels, numrels, + sechdrs[i].sh_info); + if (nents) + sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL); if (!str_has_prefix(secstrings + dstsec->sh_name, ".init")) core_plts += count_plts(syms, rels, numrels, diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4d7879484cec..462f9a9cc44b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -13,12 +13,15 @@ #include <asm/sysreg.h> #include <asm/virt.h> +#include <clocksource/arm_arch_timer.h> + #include <linux/acpi.h> #include <linux/clocksource.h> #include <linux/kvm_host.h> #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> #include <linux/smp.h> /* ARMv8 Cortex-A53 specific event types. */ @@ -155,7 +158,7 @@ armv8pmu_events_sysfs_show(struct device *dev, pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%03llx\n", pmu_attr->id); + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); } #define ARMV8_EVENT_ATTR(name, config) \ @@ -222,10 +225,29 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), + ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED), + ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC), + ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL), + ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND), + ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND), + ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT), ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), + ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES), + ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM), + ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), + ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), + ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), + ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED), + ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD), + ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR), NULL, }; @@ -244,10 +266,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) return attr->mode; - pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; - if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && - test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap)) - return attr->mode; + if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { + u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; + + if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(id, cpu_pmu->pmceid_ext_bitmap)) + return attr->mode; + } return 0; } @@ -1165,28 +1190,54 @@ device_initcall(armv8_pmu_driver_init) void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { - u32 freq; - u32 shift; + struct clock_read_data *rd; + unsigned int seq; + u64 ns; - /* - * Internal timekeeping for enabled/running/stopped times - * is always computed with the sched_clock. - */ - freq = arch_timer_get_rate(); - userpg->cap_user_time = 1; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_time_short = 0; + + do { + rd = sched_clock_read_begin(&seq); + + if (rd->read_sched_clock != arch_timer_read_counter) + return; + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + userpg->time_cycles = rd->epoch_cyc; + userpg->time_mask = rd->sched_clock_mask; + + /* + * Subtract the cycle base, such that software that + * doesn't know about cap_user_time_short still 'works' + * assuming no wraps. + */ + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + userpg->time_zero -= ns; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; - clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, - NSEC_PER_SEC, 0); /* * time_shift is not expected to be greater than 31 due to * the original published conversion algorithm shifting a * 32-bit value (now specifies a 64-bit value) - refer * perf_event_mmap_page documentation in perf_event.h. */ - if (shift == 32) { - shift = 31; + if (userpg->time_shift == 32) { + userpg->time_shift = 31; userpg->time_mult >>= 1; } - userpg->time_shift = (u16)shift; - userpg->time_offset = -now; + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; + userpg->cap_user_time_short = 1; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93b3844cf442..c793276ec7ad 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -400,11 +400,7 @@ static int __init topology_init(void) } subsys_initcall(topology_init); -/* - * Dump out kernel offset information on panic. - */ -static int dump_kernel_offset(struct notifier_block *self, unsigned long v, - void *p) +static void dump_kernel_offset(void) { const unsigned long offset = kaslr_offset(); @@ -415,17 +411,25 @@ static int dump_kernel_offset(struct notifier_block *self, unsigned long v, } else { pr_emerg("Kernel Offset: disabled\n"); } +} + +static int arm64_panic_block_dump(struct notifier_block *self, + unsigned long v, void *p) +{ + dump_kernel_offset(); + dump_cpu_features(); + dump_mem_limit(); return 0; } -static struct notifier_block kernel_offset_notifier = { - .notifier_call = dump_kernel_offset +static struct notifier_block arm64_panic_block = { + .notifier_call = arm64_panic_block_dump }; -static int __init register_kernel_offset_dumper(void) +static int __init register_arm64_panic_block(void) { atomic_notifier_chain_register(&panic_notifier_list, - &kernel_offset_notifier); + &arm64_panic_block); return 0; } -__initcall(register_kernel_offset_dumper); +device_initcall(register_arm64_panic_block); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 139679c745bf..2dd8e3b8b94b 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -199,12 +199,12 @@ static noinline void __save_stack_trace(struct task_struct *tsk, put_task_stack(tsk); } -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { __save_stack_trace(tsk, trace, 1); } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace(struct stack_trace *trace) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 47f651df781c..13ebd5ca2070 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -855,7 +855,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs) pr_emerg("Task stack: [0x%016lx..0x%016lx]\n", tsk_stk, tsk_stk + THREAD_SIZE); pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n", - irq_stk, irq_stk + THREAD_SIZE); + irq_stk, irq_stk + IRQ_STACK_SIZE); pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n", ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index e546df0efefb..d4202a32abc9 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/time_namespace.h> #include <linux/timekeeper_internal.h> #include <linux/vmalloc.h> #include <vdso/datapage.h> @@ -40,6 +41,12 @@ enum vdso_abi { #endif /* CONFIG_COMPAT_VDSO */ }; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + struct vdso_abi_info { const char *name; const char *vdso_code_start; @@ -107,25 +114,122 @@ static int __vdso_init(enum vdso_abi abi) vdso_info[abi].vdso_code_start) >> PAGE_SHIFT; - /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1, + vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages, sizeof(struct page *), GFP_KERNEL); if (vdso_pagelist == NULL) return -ENOMEM; - /* Grab the vDSO data page. */ - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); - - /* Grab the vDSO code pages. */ pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); for (i = 0; i < vdso_info[abi].vdso_pages; i++) - vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + vdso_pagelist[i] = pfn_to_page(pfn + i); + + vdso_info[abi].cm->pages = vdso_pagelist; + + return 0; +} + +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; - vdso_info[abi].dm->pages = &vdso_pagelist[0]; - vdso_info[abi].cm->pages = &vdso_pagelist[1]; + if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm)) + zap_page_range(vma, vma->vm_start, size); +#ifdef CONFIG_COMPAT_VDSO + if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm)) + zap_page_range(vma, vma->vm_start, size); +#endif + } + + mmap_read_unlock(mm); + return 0; +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = sym_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +static int vvar_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + + if (new_size != VVAR_NR_PAGES * PAGE_SIZE) + return -EINVAL; return 0; } @@ -139,9 +243,11 @@ static int __setup_additional_pages(enum vdso_abi abi, unsigned long gp_flags = 0; void *ret; + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { @@ -149,8 +255,8 @@ static int __setup_additional_pages(enum vdso_abi abi, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, + ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_PFNMAP, vdso_info[abi].dm); if (IS_ERR(ret)) goto up_fail; @@ -158,7 +264,7 @@ static int __setup_additional_pages(enum vdso_abi abi, if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti()) gp_flags = VM_ARM64_BTI; - vdso_base += PAGE_SIZE; + vdso_base += VVAR_NR_PAGES * PAGE_SIZE; mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC|gp_flags| @@ -206,6 +312,8 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { #ifdef CONFIG_COMPAT_VDSO [AA32_MAP_VVAR] = { .name = "[vvar]", + .fault = vvar_fault, + .mremap = vvar_mremap, }, [AA32_MAP_VDSO] = { .name = "[vdso]", @@ -371,6 +479,8 @@ enum aarch64_map { static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { [AA64_MAP_VVAR] = { .name = "[vvar]", + .fault = vvar_fault, + .mremap = vvar_mremap, }, [AA64_MAP_VDSO] = { .name = "[vdso]", diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 7ad2d3a0cd48..d808ad31e01f 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -17,7 +17,10 @@ OUTPUT_ARCH(aarch64) SECTIONS { - PROVIDE(_vdso_data = . - PAGE_SIZE); + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index 337d03522048..3348ce5ea306 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -17,7 +17,10 @@ OUTPUT_ARCH(arm) SECTIONS { - PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE); + PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5423ffe0a987..ec8e894684a7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -10,7 +10,6 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> #include <asm/kernel-pgtable.h> -#include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> |