diff options
Diffstat (limited to 'arch/arm64/kernel')
27 files changed, 986 insertions, 402 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1606c6b2a280..1dcb69d3d0e5 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -50,6 +50,9 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ cpu-reset.o +arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o +arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o +arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 06d650f61da7..8840c109c5d6 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -105,11 +105,11 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) return insn; } -static void __apply_alternatives(void *alt_region) +static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - u32 *origptr, *replptr; + u32 *origptr, *replptr, *updptr; for (alt = region->begin; alt < region->end; alt++) { u32 insn; @@ -124,11 +124,12 @@ static void __apply_alternatives(void *alt_region) origptr = ALT_ORIG_PTR(alt); replptr = ALT_REPL_PTR(alt); + updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr; nr_inst = alt->alt_len / sizeof(insn); for (i = 0; i < nr_inst; i++) { insn = get_alt_insn(alt, origptr + i, replptr + i); - *(origptr + i) = cpu_to_le32(insn); + updptr[i] = cpu_to_le32(insn); } flush_icache_range((uintptr_t)origptr, @@ -155,7 +156,7 @@ static int __apply_alternatives_multi_stop(void *unused) isb(); } else { BUG_ON(patched); - __apply_alternatives(®ion); + __apply_alternatives(®ion, true); /* Barriers provided by the cache flushing */ WRITE_ONCE(patched, 1); } @@ -176,5 +177,5 @@ void apply_alternatives(void *start, size_t length) .end = start + length, }; - __apply_alternatives(®ion); + __apply_alternatives(®ion, false); } diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 657977e77ec8..f0e6d717885b 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -306,7 +306,8 @@ do { \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ - : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \ + : "r" ((unsigned long)addr), "i" (-EAGAIN), \ + "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ uaccess_disable(); \ diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 3f2250fc391b..380f2e2fbed5 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,15 +17,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/bitops.h> #include <linux/cacheinfo.h> -#include <linux/cpu.h> -#include <linux/compiler.h> #include <linux/of.h> -#include <asm/cachetype.h> -#include <asm/processor.h> - #define MAX_CACHE_LEVEL 7 /* Max 7 level supported */ /* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ #define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) @@ -43,43 +37,11 @@ static inline enum cache_type get_cache_type(int level) return CLIDR_CTYPE(clidr, level); } -/* - * Cache Size Selection Register(CSSELR) selects which Cache Size ID - * Register(CCSIDR) is accessible by specifying the required cache - * level and the cache type. We need to ensure that no one else changes - * CSSELR by calling this in non-preemtible context - */ -u64 __attribute_const__ cache_get_ccsidr(u64 csselr) -{ - u64 ccsidr; - - WARN_ON(preemptible()); - - write_sysreg(csselr, csselr_el1); - isb(); - ccsidr = read_sysreg(ccsidr_el1); - - return ccsidr; -} - static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, unsigned int level) { - bool is_icache = type & CACHE_TYPE_INST; - u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache); - this_leaf->level = level; this_leaf->type = type; - this_leaf->coherency_line_size = CACHE_LINESIZE(tmp); - this_leaf->number_of_sets = CACHE_NUMSETS(tmp); - this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp); - this_leaf->size = this_leaf->number_of_sets * - this_leaf->coherency_line_size * this_leaf->ways_of_associativity; - this_leaf->attributes = - ((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) | - ((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) | - ((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) | - ((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0); } static int __init_cache_level(unsigned int cpu) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6eb77ae99b79..817ce3365e20 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -97,6 +97,13 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), @@ -153,9 +160,9 @@ static const struct arm64_ftr_bits ftr_ctr[] = { /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. - * If we have differing I-cache policies, report it as the weakest - AIVIVT. + * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; @@ -314,7 +321,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -585,7 +592,7 @@ void update_cpu_features(int cpu, * If we have AArch32, we care about 32-bit features for compat. * If the system doesn't support AArch32, don't update them. */ - if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, @@ -636,7 +643,7 @@ void update_cpu_features(int cpu, "Unsupported CPU feature variation.\n"); } -u64 read_system_reg(u32 id) +u64 read_sanitised_ftr_reg(u32 id) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); @@ -649,10 +656,10 @@ u64 read_system_reg(u32 id) case r: return read_sysreg_s(r) /* - * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ -static u64 __raw_read_system_reg(u32 sys_id) +static u64 __read_sysreg_by_encoding(u32 sys_id) { switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); @@ -709,9 +716,9 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); if (scope == SCOPE_SYSTEM) - val = read_system_reg(entry->sys_reg); + val = read_sanitised_ftr_reg(entry->sys_reg); else - val = __raw_read_system_reg(entry->sys_reg); + val = __read_sysreg_by_encoding(entry->sys_reg); return feature_matches(val, entry); } @@ -761,7 +768,7 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) { - u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return cpuid_feature_extract_signed_field(pfr0, ID_AA64PFR0_FP_SHIFT) < 0; @@ -888,6 +895,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), {}, }; @@ -975,8 +985,16 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, */ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { - for (; caps->matches; caps++) - if (caps->enable && cpus_have_cap(caps->capability)) + for (; caps->matches; caps++) { + unsigned int num = caps->capability; + + if (!cpus_have_cap(num)) + continue; + + /* Ensure cpus_have_const_cap(num) works */ + static_branch_enable(&cpu_hwcap_keys[num]); + + if (caps->enable) { /* * Use stop_machine() as it schedules the work allowing * us to modify PSTATE, instead of on_each_cpu() which @@ -984,6 +1002,8 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) * we return. */ stop_machine(caps->enable, NULL, cpu_online_mask); + } + } } /* @@ -1086,6 +1106,14 @@ static void __init setup_feature_capabilities(void) enable_cpu_capabilities(arm64_features); } +DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); +EXPORT_SYMBOL(arm64_const_caps_ready); + +static void __init mark_const_caps_ready(void) +{ + static_branch_enable(&arm64_const_caps_ready); +} + /* * Check if the current CPU has a given feature capability. * Should be called from non-preemptible context. @@ -1121,6 +1149,7 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); enable_errata_workarounds(); + mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 5b22c687f02a..68b1f364c515 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -15,7 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <asm/arch_timer.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpufeature.h> @@ -43,10 +43,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; static char *icache_policy_str[] = { - [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", - [ICACHE_POLICY_AIVIVT] = "AIVIVT", - [ICACHE_POLICY_VIPT] = "VIPT", - [ICACHE_POLICY_PIPT] = "PIPT", + [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_VIPT] = "VIPT", + [ICACHE_POLICY_PIPT] = "PIPT", + [ICACHE_POLICY_VPIPT] = "VPIPT", }; unsigned long __icache_flags; @@ -65,6 +65,9 @@ static const char *const hwcap_str[] = { "asimdhp", "cpuid", "asimdrdm", + "jscvt", + "fcma", + "lrcpc", NULL }; @@ -289,20 +292,18 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) unsigned int cpu = smp_processor_id(); u32 l1ip = CTR_L1IP(info->reg_ctr); - if (l1ip != ICACHE_POLICY_PIPT) { - /* - * VIPT caches are non-aliasing if the VA always equals the PA - * in all bit positions that are covered by the index. This is - * the case if the size of a way (# of sets * line size) does - * not exceed PAGE_SIZE. - */ - u32 waysize = icache_get_numsets() * icache_get_linesize(); - - if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) - set_bit(ICACHEF_ALIASING, &__icache_flags); + switch (l1ip) { + case ICACHE_POLICY_PIPT: + break; + case ICACHE_POLICY_VPIPT: + set_bit(ICACHEF_VPIPT, &__icache_flags); + break; + default: + /* Fallthrough */ + case ICACHE_POLICY_VIPT: + /* Assume aliasing */ + set_bit(ICACHEF_ALIASING, &__icache_flags); } - if (l1ip == ICACHE_POLICY_AIVIVT) - set_bit(ICACHEF_AIVIVT, &__icache_flags); pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c new file mode 100644 index 000000000000..f46d57c31443 --- /dev/null +++ b/arch/arm64/kernel/crash_dump.c @@ -0,0 +1,71 @@ +/* + * Routines for doing kexec-based kdump + * + * Copyright (C) 2017 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crash_dump.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/memblock.h> +#include <linux/uaccess.h> +#include <asm/memory.h> + +/** + * copy_oldmem_page() - copy one page from old kernel memory + * @pfn: page frame number to be copied + * @buf: buffer where the copied page is placed + * @csize: number of bytes to copy + * @offset: offset in bytes into the page + * @userbuf: if set, @buf is in a user address space + * + * This function copies one page from old kernel memory into buffer pointed by + * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes + * copied or negative error in case of failure. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, + int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + if (userbuf) { + if (copy_to_user((char __user *)buf, vaddr + offset, csize)) { + memunmap(vaddr); + return -EFAULT; + } + } else { + memcpy(buf, vaddr + offset, csize); + } + + memunmap(vaddr); + + return csize; +} + +/** + * elfcorehdr_read - read from ELF core header + * @buf: buffer where the data is placed + * @csize: number of bytes to read + * @ppos: address in the memory + * + * This function reads @count bytes from elf core header which exists + * on crash dump kernel's memory. + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count); + return count; +} diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 32913567da08..d618e25c3de1 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -36,7 +36,7 @@ /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1), ID_AA64DFR0_DEBUGVER_SHIFT); } diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S new file mode 100644 index 000000000000..613fc3000677 --- /dev/null +++ b/arch/arm64/kernel/efi-header.S @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2013 - 2017 Linaro, Ltd. + * Copyright (C) 2013, 2014 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/pe.h> +#include <linux/sizes.h> + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: + .short IMAGE_FILE_MACHINE_ARM64 // Machine + .short section_count // NumberOfSections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 0 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics + +optional_header: + .short PE_OPT_MAGIC_PE32PLUS // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long __initdata_begin - efi_header_end // SizeOfCode + .long __pecoff_data_size // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long __efistub_entry - _head // AddressOfEntryPoint + .long efi_header_end - _head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long SZ_4K // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _end - _head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long efi_header_end - _head // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long (section_table - .) / 8 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + +#ifdef CONFIG_DEBUG_EFI + .long efi_debug_table - _head // DebugTable + .long efi_debug_table_size +#endif + + // Section table +section_table: + .ascii ".text\0\0\0" + .long __initdata_begin - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long __initdata_begin - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE // Characteristics + + .ascii ".data\0\0\0" + .long __pecoff_data_size // VirtualSize + .long __initdata_begin - _head // VirtualAddress + .long __pecoff_data_rawsize // SizeOfRawData + .long __initdata_begin - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE // Characteristics + + .set section_count, (. - section_table) / 40 + +#ifdef CONFIG_DEBUG_EFI + /* + * The debug table is referenced via its Relative Virtual Address (RVA), + * which is only defined for those parts of the image that are covered + * by a section declaration. Since this header is not covered by any + * section, the debug table must be emitted elsewhere. So stick it in + * the .init.rodata section instead. + * + * Note that the EFI debug entry itself may legally have a zero RVA, + * which means we can simply put it right after the section headers. + */ + __INITRODATA + + .align 2 +efi_debug_table: + // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY + .long 0 // Characteristics + .long 0 // TimeDateStamp + .short 0 // MajorVersion + .short 0 // MinorVersion + .long IMAGE_DEBUG_TYPE_CODEVIEW // Type + .long efi_debug_entry_size // SizeOfData + .long 0 // RVA + .long efi_debug_entry - _head // FileOffset + + .set efi_debug_table_size, . - efi_debug_table + .previous + +efi_debug_entry: + // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY + .ascii "NB10" // Signature + .long 0 // Unknown + .long 0 // Unknown2 + .long 0 // Unknown3 + + .asciz VMLINUX_PATH + + .set efi_debug_entry_size, . - efi_debug_entry +#endif + + /* + * EFI will load .text onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that .text is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 +efi_header_end: + .endm diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 43512d4d7df2..b738880350f9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,12 +428,13 @@ el1_da: /* * Data abort handling */ - mrs x0, far_el1 + mrs x3, far_el1 enable_dbg // re-enable interrupts if they were enabled in the aborted context tbnz x23, #7, 1f // PSR_I_BIT enable_irq 1: + clear_address_tag x0, x3 mov x2, sp // struct pt_regs bl do_mem_abort @@ -594,7 +595,7 @@ el0_da: // enable interrupts before calling the main handler enable_dbg_and_irq ct_user_exit - bic x0, x26, #(0xff << 56) + clear_address_tag x0, x26 mov x1, x25 mov x2, sp bl do_mem_abort diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4fb6ccd886d1..973df7de7bf8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -42,6 +42,8 @@ #include <asm/thread_info.h> #include <asm/virt.h> +#include "efi-header.S" + #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #if (TEXT_OFFSET & 0xfff) != 0 @@ -89,166 +91,14 @@ _head: .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved - .byte 0x41 // Magic number, "ARM\x64" - .byte 0x52 - .byte 0x4d - .byte 0x64 + .ascii "ARM\x64" // Magic number #ifdef CONFIG_EFI .long pe_header - _head // Offset to the PE header. -#else - .word 0 // reserved -#endif -#ifdef CONFIG_EFI - .align 3 pe_header: - .ascii "PE" - .short 0 -coff_header: - .short 0xaa64 // AArch64 - .short 2 // nr_sections - .long 0 // TimeDateStamp - .long 0 // PointerToSymbolTable - .long 1 // NumberOfSymbols - .short section_table - optional_header // SizeOfOptionalHeader - .short 0x206 // Characteristics. - // IMAGE_FILE_DEBUG_STRIPPED | - // IMAGE_FILE_EXECUTABLE_IMAGE | - // IMAGE_FILE_LINE_NUMS_STRIPPED -optional_header: - .short 0x20b // PE32+ format - .byte 0x02 // MajorLinkerVersion - .byte 0x14 // MinorLinkerVersion - .long _end - efi_header_end // SizeOfCode - .long 0 // SizeOfInitializedData - .long 0 // SizeOfUninitializedData - .long __efistub_entry - _head // AddressOfEntryPoint - .long efi_header_end - _head // BaseOfCode - -extra_header_fields: - .quad 0 // ImageBase - .long 0x1000 // SectionAlignment - .long PECOFF_FILE_ALIGNMENT // FileAlignment - .short 0 // MajorOperatingSystemVersion - .short 0 // MinorOperatingSystemVersion - .short 0 // MajorImageVersion - .short 0 // MinorImageVersion - .short 0 // MajorSubsystemVersion - .short 0 // MinorSubsystemVersion - .long 0 // Win32VersionValue - - .long _end - _head // SizeOfImage - - // Everything before the kernel image is considered part of the header - .long efi_header_end - _head // SizeOfHeaders - .long 0 // CheckSum - .short 0xa // Subsystem (EFI application) - .short 0 // DllCharacteristics - .quad 0 // SizeOfStackReserve - .quad 0 // SizeOfStackCommit - .quad 0 // SizeOfHeapReserve - .quad 0 // SizeOfHeapCommit - .long 0 // LoaderFlags - .long (section_table - .) / 8 // NumberOfRvaAndSizes - - .quad 0 // ExportTable - .quad 0 // ImportTable - .quad 0 // ResourceTable - .quad 0 // ExceptionTable - .quad 0 // CertificationTable - .quad 0 // BaseRelocationTable - -#ifdef CONFIG_DEBUG_EFI - .long efi_debug_table - _head // DebugTable - .long efi_debug_table_size -#endif - - // Section table -section_table: - - /* - * The EFI application loader requires a relocation section - * because EFI applications must be relocatable. This is a - * dummy section as far as we are concerned. - */ - .ascii ".reloc" - .byte 0 - .byte 0 // end of 0 padding of section name - .long 0 - .long 0 - .long 0 // SizeOfRawData - .long 0 // PointerToRawData - .long 0 // PointerToRelocations - .long 0 // PointerToLineNumbers - .short 0 // NumberOfRelocations - .short 0 // NumberOfLineNumbers - .long 0x42100040 // Characteristics (section flags) - - - .ascii ".text" - .byte 0 - .byte 0 - .byte 0 // end of 0 padding of section name - .long _end - efi_header_end // VirtualSize - .long efi_header_end - _head // VirtualAddress - .long _edata - efi_header_end // SizeOfRawData - .long efi_header_end - _head // PointerToRawData - - .long 0 // PointerToRelocations (0 for executables) - .long 0 // PointerToLineNumbers (0 for executables) - .short 0 // NumberOfRelocations (0 for executables) - .short 0 // NumberOfLineNumbers (0 for executables) - .long 0xe0500020 // Characteristics (section flags) - -#ifdef CONFIG_DEBUG_EFI - /* - * The debug table is referenced via its Relative Virtual Address (RVA), - * which is only defined for those parts of the image that are covered - * by a section declaration. Since this header is not covered by any - * section, the debug table must be emitted elsewhere. So stick it in - * the .init.rodata section instead. - * - * Note that the EFI debug entry itself may legally have a zero RVA, - * which means we can simply put it right after the section headers. - */ - __INITRODATA - - .align 2 -efi_debug_table: - // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY - .long 0 // Characteristics - .long 0 // TimeDateStamp - .short 0 // MajorVersion - .short 0 // MinorVersion - .long 2 // Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW - .long efi_debug_entry_size // SizeOfData - .long 0 // RVA - .long efi_debug_entry - _head // FileOffset - - .set efi_debug_table_size, . - efi_debug_table - .previous - -efi_debug_entry: - // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY - .ascii "NB10" // Signature - .long 0 // Unknown - .long 0 // Unknown2 - .long 0 // Unknown3 - - .asciz VMLINUX_PATH - - .set efi_debug_entry_size, . - efi_debug_entry -#endif - - /* - * EFI will load .text onwards at the 4k section alignment - * described in the PE/COFF header. To ensure that instruction - * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that .text is - * placed at a 4k boundary in the Image to begin with. - */ - .align 12 -efi_header_end: + __EFI_PE_HEADER +#else + .long 0 // reserved #endif __INIT @@ -534,13 +384,8 @@ ENTRY(kimage_vaddr) ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 -CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 -CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 - msr sctlr_el2, x0 - b 2f -1: mrs x0, sctlr_el1 + b.eq 1f + mrs x0, sctlr_el1 CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr sctlr_el1, x0 @@ -548,7 +393,11 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 isb ret -2: +1: mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + #ifdef CONFIG_ARM64_VHE /* * Check for VHE being present. For the rest of the EL2 setup, @@ -594,14 +443,14 @@ set_hcr: cmp x0, #1 b.ne 3f - mrs_s x0, ICC_SRE_EL2 + mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr_s ICC_SRE_EL2, x0 + msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set - mrs_s x0, ICC_SRE_EL2 // Read SRE back, + mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, 3f // and check that it sticks - msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults 3: #endif @@ -612,26 +461,6 @@ set_hcr: msr vpidr_el2, x0 msr vmpidr_el2, x1 - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - cbnz x2, 1f - - /* sctlr_el1 */ - mov x0, #0x0800 // Set/clear RES{1,0} bits -CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems -CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems - msr sctlr_el1, x0 - - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 -1: - #ifdef CONFIG_COMPAT msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif @@ -668,6 +497,23 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems ret install_el2_stub: + /* + * When VHE is not in use, early init of EL2 and EL1 needs to be + * done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ + /* sctlr_el1 */ + mov x0, #0x0800 // Set/clear RES{1,0} bits +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + msr sctlr_el1, x0 + + /* Coprocessor traps. */ + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 + /* Hypervisor stub */ adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 97a7384100f3..a44e13942d30 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -28,6 +28,7 @@ #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/irqflags.h> +#include <asm/kexec.h> #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> @@ -102,7 +103,8 @@ int pfn_is_nosave(unsigned long pfn) unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin); unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1); - return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); + return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) || + crash_is_nosave(pfn); } void notrace save_processor_state(void) @@ -286,6 +288,9 @@ int swsusp_arch_suspend(void) local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { + /* make the crash dump kernel image visible/saveable */ + crash_prepare_suspend(); + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { @@ -297,6 +302,9 @@ int swsusp_arch_suspend(void) if (el2_reset_needed()) dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); + /* make the crash dump kernel image protected again */ + crash_post_resume(); + /* * Tell the hibernation core that we've just restored * the memory diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0296e7924240..749f81779420 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -36,6 +36,7 @@ #include <asm/traps.h> #include <asm/cputype.h> #include <asm/system_misc.h> +#include <asm/uaccess.h> /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -721,6 +722,8 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, u64 wp_low, wp_high; u32 lens, lene; + addr = untagged_addr(addr); + lens = __ffs(ctrl->len); lene = __fls(ctrl->len); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index d3b5f75e652e..e1261fbaa374 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -55,18 +55,7 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x30, esr_el2 - lsr x30, x30, #ESR_ELx_EC_SHIFT - - cmp x30, #ESR_ELx_EC_HVC64 - b.ne 9f // Not an HVC trap - - cmp x0, #HVC_GET_VECTORS - b.ne 1f - mrs x0, vbar_el2 - b 9f - -1: cmp x0, #HVC_SET_VECTORS + cmp x0, #HVC_SET_VECTORS b.ne 2f msr vbar_el2, x1 b 9f @@ -79,10 +68,15 @@ el1_sync: mov x1, x3 br x4 // no return +3: cmp x0, #HVC_RESET_VECTORS + beq 9f // Nothing to reset! + /* Someone called kvm_call_hyp() against the hyp-stub... */ -3: mov x0, #ARM_EXCEPTION_HYP_GONE + ldr x0, =HVC_STUB_ERR + eret -9: eret +9: mov x0, xzr + eret ENDPROC(el1_sync) .macro invalid_vector label @@ -121,19 +115,15 @@ ENDPROC(\label) * initialisation entry point. */ -ENTRY(__hyp_get_vectors) - str lr, [sp, #-16]! - mov x0, #HVC_GET_VECTORS - hvc #0 - ldr lr, [sp], #16 - ret -ENDPROC(__hyp_get_vectors) - ENTRY(__hyp_set_vectors) - str lr, [sp, #-16]! mov x1, x0 mov x0, #HVC_SET_VECTORS hvc #0 - ldr lr, [sp], #16 ret ENDPROC(__hyp_set_vectors) + +ENTRY(__hyp_reset_vectors) + mov x0, #HVC_RESET_VECTORS + hvc #0 + ret +ENDPROC(__hyp_reset_vectors) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index bc96c8a7fc79..481f54a866c5 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -9,12 +9,19 @@ * published by the Free Software Foundation. */ +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> #include <linux/kexec.h> +#include <linux/page-flags.h> #include <linux/smp.h> #include <asm/cacheflush.h> #include <asm/cpu_ops.h> +#include <asm/memory.h> +#include <asm/mmu.h> #include <asm/mmu_context.h> +#include <asm/page.h> #include "cpu-reset.h" @@ -22,8 +29,6 @@ extern const unsigned char arm64_relocate_new_kernel[]; extern const unsigned long arm64_relocate_new_kernel_size; -static unsigned long kimage_start; - /** * kexec_image_info - For debugging output. */ @@ -64,8 +69,6 @@ void machine_kexec_cleanup(struct kimage *kimage) */ int machine_kexec_prepare(struct kimage *kimage) { - kimage_start = kimage->start; - kexec_image_info(kimage); if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { @@ -144,11 +147,15 @@ void machine_kexec(struct kimage *kimage) { phys_addr_t reboot_code_buffer_phys; void *reboot_code_buffer; + bool in_kexec_crash = (kimage == kexec_crash_image); + bool stuck_cpus = cpus_are_stuck_in_kernel(); /* * New cpus may have become stuck_in_kernel after we loaded the image. */ - BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1)); + BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); + WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), + "Some CPUs may be stale, kdump will be unreliable.\n"); reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); @@ -183,7 +190,7 @@ void machine_kexec(struct kimage *kimage) kexec_list_flush(kimage); /* Flush the new image if already in place. */ - if (kimage->head & IND_DONE) + if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) kexec_segment_flush(kimage); pr_info("Bye!\n"); @@ -200,13 +207,158 @@ void machine_kexec(struct kimage *kimage) * relocation is complete. */ - cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, - kimage_start, 0); + cpu_soft_restart(kimage != kexec_crash_image, + reboot_code_buffer_phys, kimage->head, kimage->start, 0); BUG(); /* Should never get here. */ } +static void machine_kexec_mask_interrupts(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + +/** + * machine_crash_shutdown - shutdown non-crashing cpus and save registers + */ void machine_crash_shutdown(struct pt_regs *regs) { - /* Empty routine needed to avoid build errors. */ + local_irq_disable(); + + /* shutdown non-crashing cpus */ + smp_send_crash_stop(); + + /* for crashing cpu */ + crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + + pr_info("Starting crashdump kernel...\n"); +} + +void arch_kexec_protect_crashkres(void) +{ + int i; + + kexec_segment_flush(kexec_crash_image); + + for (i = 0; i < kexec_crash_image->nr_segments; i++) + set_memory_valid( + __phys_to_virt(kexec_crash_image->segment[i].mem), + kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0); +} + +void arch_kexec_unprotect_crashkres(void) +{ + int i; + + for (i = 0; i < kexec_crash_image->nr_segments; i++) + set_memory_valid( + __phys_to_virt(kexec_crash_image->segment[i].mem), + kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1); +} + +#ifdef CONFIG_HIBERNATION +/* + * To preserve the crash dump kernel image, the relevant memory segments + * should be mapped again around the hibernation. + */ +void crash_prepare_suspend(void) +{ + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); +} + +void crash_post_resume(void) +{ + if (kexec_crash_image) + arch_kexec_protect_crashkres(); +} + +/* + * crash_is_nosave + * + * Return true only if a page is part of reserved memory for crash dump kernel, + * but does not hold any data of loaded kernel image. + * + * Note that all the pages in crash dump kernel memory have been initially + * marked as Reserved in kexec_reserve_crashkres_pages(). + * + * In hibernation, the pages which are Reserved and yet "nosave" are excluded + * from the hibernation iamge. crash_is_nosave() does thich check for crash + * dump kernel and will reduce the total size of hibernation image. + */ + +bool crash_is_nosave(unsigned long pfn) +{ + int i; + phys_addr_t addr; + + if (!crashk_res.end) + return false; + + /* in reserved memory? */ + addr = __pfn_to_phys(pfn); + if ((addr < crashk_res.start) || (crashk_res.end < addr)) + return false; + + if (!kexec_crash_image) + return true; + + /* not part of loaded kernel image? */ + for (i = 0; i < kexec_crash_image->nr_segments; i++) + if (addr >= kexec_crash_image->segment[i].mem && + addr < (kexec_crash_image->segment[i].mem + + kexec_crash_image->segment[i].memsz)) + return false; + + return true; +} + +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + struct page *page; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + page = phys_to_page(addr); + ClearPageReserved(page); + free_reserved_page(page); + } +} +#endif /* CONFIG_HIBERNATION */ + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", + kimage_voffset); + vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", + PHYS_OFFSET); } diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 1ce90d8450ae..d05dbe658409 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -26,35 +26,21 @@ struct plt_entry { __le32 br; /* br x16 */ }; -u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, +static bool in_init(const struct module *mod, void *loc) +{ + return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; +} + +u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym) { - struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr; - int i = mod->arch.plt_num_entries; + struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : + &mod->arch.init; + struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr; + int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; /* - * We only emit PLT entries against undefined (SHN_UNDEF) symbols, - * which are listed in the ELF symtab section, but without a type - * or a size. - * So, similar to how the module loader uses the Elf64_Sym::st_value - * field to store the resolved addresses of undefined symbols, let's - * borrow the Elf64_Sym::st_size field (whose value is never used by - * the module loader, even for symbols that are defined) to record - * the address of a symbol's associated PLT entry as we emit it for a - * zero addend relocation (which is the only kind we have to deal with - * in practice). This allows us to find duplicates without having to - * go through the table every time. - */ - if (rela->r_addend == 0 && sym->st_size != 0) { - BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]); - return sym->st_size; - } - - mod->arch.plt_num_entries++; - BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries); - - /* * MOVK/MOVN/MOVZ opcode: * +--------+------------+--------+-----------+-------------+---------+ * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | @@ -72,8 +58,19 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, cpu_to_le32(0xd61f0200) }; - if (rela->r_addend == 0) - sym->st_size = (u64)&plt[i]; + /* + * Check if the entry we just created is a duplicate. Given that the + * relocations are sorted, this will be the last entry we allocated. + * (if one exists). + */ + if (i > 0 && + plt[i].mov0 == plt[i - 1].mov0 && + plt[i].mov1 == plt[i - 1].mov1 && + plt[i].mov2 == plt[i - 1].mov2) + return (u64)&plt[i - 1]; + + pltsec->plt_num_entries++; + BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries); return (u64)&plt[i]; } @@ -104,7 +101,8 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num) return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; } -static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) +static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, + Elf64_Word dstidx) { unsigned int ret = 0; Elf64_Sym *s; @@ -116,13 +114,17 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) case R_AARCH64_CALL26: /* * We only have to consider branch targets that resolve - * to undefined symbols. This is not simply a heuristic, - * it is a fundamental limitation, since the PLT itself - * is part of the module, and needs to be within 128 MB - * as well, so modules can never grow beyond that limit. + * to symbols that are defined in a different section. + * This is not simply a heuristic, it is a fundamental + * limitation, since there is no guaranteed way to emit + * PLT entries sufficiently close to the branch if the + * section size exceeds the range of a branch + * instruction. So ignore relocations against defined + * symbols if they live in the same section as the + * relocation target. */ s = syms + ELF64_R_SYM(rela[i].r_info); - if (s->st_shndx != SHN_UNDEF) + if (s->st_shndx == dstidx) break; /* @@ -149,7 +151,8 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long plt_max_entries = 0; + unsigned long core_plts = 0; + unsigned long init_plts = 0; Elf64_Sym *syms = NULL; int i; @@ -158,14 +161,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, * entries. Record the symtab address as well. */ for (i = 0; i < ehdr->e_shnum; i++) { - if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0) - mod->arch.plt = sechdrs + i; + if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + mod->arch.core.plt = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) + mod->arch.init.plt = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } - if (!mod->arch.plt) { - pr_err("%s: module PLT section missing\n", mod->name); + if (!mod->arch.core.plt || !mod->arch.init.plt) { + pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; } if (!syms) { @@ -188,14 +193,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* sort by type, symbol index and addend */ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); - plt_max_entries += count_plts(syms, rels, numrels); + if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) + core_plts += count_plts(syms, rels, numrels, + sechdrs[i].sh_info); + else + init_plts += count_plts(syms, rels, numrels, + sechdrs[i].sh_info); } - mod->arch.plt->sh_type = SHT_NOBITS; - mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry); - mod->arch.plt_num_entries = 0; - mod->arch.plt_max_entries = plt_max_entries; + mod->arch.core.plt->sh_type = SHT_NOBITS; + mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core.plt->sh_size = (core_plts + 1) * sizeof(struct plt_entry); + mod->arch.core.plt_num_entries = 0; + mod->arch.core.plt_max_entries = core_plts; + + mod->arch.init.plt->sh_type = SHT_NOBITS; + mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry); + mod->arch.init.plt_num_entries = 0; + mod->arch.init.plt_max_entries = init_plts; + return 0; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 7f316982ce00..f035ff6fb223 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -32,11 +32,16 @@ void *module_alloc(unsigned long size) { + gfp_t gfp_mask = GFP_KERNEL; void *p; + /* Silence the initial allocation */ + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + gfp_mask |= __GFP_NOWARN; + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + MODULES_VSIZE, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && @@ -380,7 +385,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && ovf == -ERANGE) { - val = module_emit_plt_entry(me, &rel[i], sym); + val = module_emit_plt_entry(me, loc, &rel[i], sym); ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index 8949f6c6f729..f7c9781a9d48 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,3 +1,4 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } + .init.plt (NOLOAD) : { BYTE(0) } } diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..c7e3e6387a49 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); - if (!root_ops) + if (!root_ops) { + kfree(ri); return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); if (!ri->cfg) { diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 57ae9d9ed9bb..83a1b1ad189f 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -290,6 +290,12 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, @@ -871,15 +877,24 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, if (attr->exclude_idle) return -EPERM; - if (is_kernel_in_hyp_mode() && - attr->exclude_kernel != attr->exclude_hv) - return -EINVAL; + + /* + * If we're running in hyp mode, then we *are* the hypervisor. + * Therefore we ignore exclude_hv in this configuration, since + * there's no hypervisor to sample anyway. This is consistent + * with other architectures (x86 and Power). + */ + if (is_kernel_in_hyp_mode()) { + if (!attr->exclude_kernel) + config_base |= ARMV8_PMU_INCLUDE_EL2; + } else { + if (attr->exclude_kernel) + config_base |= ARMV8_PMU_EXCLUDE_EL1; + if (!attr->exclude_hv) + config_base |= ARMV8_PMU_INCLUDE_EL2; + } if (attr->exclude_user) config_base |= ARMV8_PMU_EXCLUDE_EL0; - if (!is_kernel_in_hyp_mode() && attr->exclude_kernel) - config_base |= ARMV8_PMU_EXCLUDE_EL1; - if (!attr->exclude_hv) - config_base |= ARMV8_PMU_INCLUDE_EL2; /* * Install the filter into config_base as this is used to @@ -957,10 +972,26 @@ static int armv8_vulcan_map_event(struct perf_event *event) ARMV8_PMU_EVTYPE_EVENT); } +struct armv8pmu_probe_info { + struct arm_pmu *pmu; + bool present; +}; + static void __armv8pmu_probe_pmu(void *info) { - struct arm_pmu *cpu_pmu = info; + struct armv8pmu_probe_info *probe = info; + struct arm_pmu *cpu_pmu = probe->pmu; + u64 dfr0; u32 pmceid[2]; + int pmuver; + + dfr0 = read_sysreg(id_aa64dfr0_el1); + pmuver = cpuid_feature_extract_signed_field(dfr0, + ID_AA64DFR0_PMUVER_SHIFT); + if (pmuver < 1) + return; + + probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) @@ -979,13 +1010,27 @@ static void __armv8pmu_probe_pmu(void *info) static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) { - return smp_call_function_any(&cpu_pmu->supported_cpus, + struct armv8pmu_probe_info probe = { + .pmu = cpu_pmu, + .present = false, + }; + int ret; + + ret = smp_call_function_any(&cpu_pmu->supported_cpus, __armv8pmu_probe_pmu, - cpu_pmu, 1); + &probe, 1); + if (ret) + return ret; + + return probe.present ? 0 : -ENODEV; } -static void armv8_pmu_init(struct arm_pmu *cpu_pmu) +static int armv8_pmu_init(struct arm_pmu *cpu_pmu) { + int ret = armv8pmu_probe_pmu(cpu_pmu); + if (ret) + return ret; + cpu_pmu->handle_irq = armv8pmu_handle_irq, cpu_pmu->enable = armv8pmu_enable_event, cpu_pmu->disable = armv8pmu_disable_event, @@ -997,78 +1042,104 @@ static void armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->reset = armv8pmu_reset, cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + + return 0; } static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cortex_a72"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cavium_thunder"; cpu_pmu->map_event = armv8_thunder_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_brcm_vulcan"; cpu_pmu->map_event = armv8_vulcan_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static const struct of_device_id armv8_pmu_of_device_ids[] = { @@ -1081,24 +1152,9 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {}, }; -/* - * Non DT systems have their micro/arch events probed at run-time. - * A fairly complete list of generic events are provided and ones that - * aren't supported by the current PMU are disabled. - */ -static const struct pmu_probe_info armv8_pmu_probe_table[] = { - PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */ - { /* sentinel value */ } -}; - static int armv8_pmu_device_probe(struct platform_device *pdev) { - if (acpi_disabled) - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, - NULL); - - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, - armv8_pmu_probe_table); + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); } static struct platform_driver armv8_pmu_driver = { @@ -1109,4 +1165,11 @@ static struct platform_driver armv8_pmu_driver = { .probe = armv8_pmu_device_probe, }; -builtin_platform_driver(armv8_pmu_driver); +static int __init armv8_pmu_driver_init(void) +{ + if (acpi_disabled) + return platform_driver_register(&armv8_pmu_driver); + else + return arm_pmu_acpi_probe(armv8_pmuv3_init); +} +device_initcall(armv8_pmu_driver_init) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 043d373b8369..ae2a835898d7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -205,12 +205,10 @@ void __show_regs(struct pt_regs *regs) pr_cont("\n"); } - printk("\n"); } void show_regs(struct pt_regs * regs) { - printk("\n"); __show_regs(regs); } diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c new file mode 100644 index 000000000000..c124752a8bd3 --- /dev/null +++ b/arch/arm64/kernel/reloc_test_core.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> + +int sym64_rel; + +#define SYM64_ABS_VAL 0xffff880000cccccc +#define SYM32_ABS_VAL 0xf800cccc +#define SYM16_ABS_VAL 0xf8cc + +#define __SET_ABS(name, val) asm(".globl " #name "; .set "#name ", " #val) +#define SET_ABS(name, val) __SET_ABS(name, val) + +SET_ABS(sym64_abs, SYM64_ABS_VAL); +SET_ABS(sym32_abs, SYM32_ABS_VAL); +SET_ABS(sym16_abs, SYM16_ABS_VAL); + +asmlinkage u64 absolute_data64(void); +asmlinkage u64 absolute_data32(void); +asmlinkage u64 absolute_data16(void); +asmlinkage u64 signed_movw(void); +asmlinkage u64 unsigned_movw(void); +asmlinkage u64 relative_adrp(void); +asmlinkage u64 relative_adr(void); +asmlinkage u64 relative_data64(void); +asmlinkage u64 relative_data32(void); +asmlinkage u64 relative_data16(void); + +static struct { + char name[32]; + u64 (*f)(void); + u64 expect; +} const funcs[] = { + { "R_AARCH64_ABS64", absolute_data64, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_ABS32", absolute_data32, UL(SYM32_ABS_VAL) }, + { "R_AARCH64_ABS16", absolute_data16, UL(SYM16_ABS_VAL) }, + { "R_AARCH64_MOVW_SABS_Gn", signed_movw, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_MOVW_UABS_Gn", unsigned_movw, UL(SYM64_ABS_VAL) }, +#ifndef CONFIG_ARM64_ERRATUM_843419 + { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp, (u64)&sym64_rel }, +#endif + { "R_AARCH64_ADR_PREL_LO21", relative_adr, (u64)&sym64_rel }, + { "R_AARCH64_PREL64", relative_data64, (u64)&sym64_rel }, + { "R_AARCH64_PREL32", relative_data32, (u64)&sym64_rel }, + { "R_AARCH64_PREL16", relative_data16, (u64)&sym64_rel }, +}; + +static int reloc_test_init(void) +{ + int i; + + pr_info("Relocation test:\n"); + pr_info("-------------------------------------------------------\n"); + + for (i = 0; i < ARRAY_SIZE(funcs); i++) { + u64 ret = funcs[i].f(); + + pr_info("%-31s 0x%016llx %s\n", funcs[i].name, ret, + ret == funcs[i].expect ? "pass" : "fail"); + if (ret != funcs[i].expect) + pr_err("Relocation failed, expected 0x%016llx, not 0x%016llx\n", + funcs[i].expect, ret); + } + return 0; +} + +static void reloc_test_exit(void) +{ +} + +module_init(reloc_test_init); +module_exit(reloc_test_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S new file mode 100644 index 000000000000..e1edcefeb02d --- /dev/null +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/linkage.h> + +ENTRY(absolute_data64) + ldr x0, 0f + ret +0: .quad sym64_abs +ENDPROC(absolute_data64) + +ENTRY(absolute_data32) + ldr w0, 0f + ret +0: .long sym32_abs +ENDPROC(absolute_data32) + +ENTRY(absolute_data16) + adr x0, 0f + ldrh w0, [x0] + ret +0: .short sym16_abs, 0 +ENDPROC(absolute_data16) + +ENTRY(signed_movw) + movz x0, #:abs_g2_s:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(signed_movw) + +ENTRY(unsigned_movw) + movz x0, #:abs_g3:sym64_abs + movk x0, #:abs_g2_nc:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(unsigned_movw) + +#ifndef CONFIG_ARM64_ERRATUM_843419 + +ENTRY(relative_adrp) + adrp x0, sym64_rel + add x0, x0, #:lo12:sym64_rel + ret +ENDPROC(relative_adrp) + +#endif + +ENTRY(relative_adr) + adr x0, sym64_rel + ret +ENDPROC(relative_adr) + +ENTRY(relative_data64) + adr x1, 0f + ldr x0, [x1] + add x0, x0, x1 + ret +0: .quad sym64_rel - . +ENDPROC(relative_data64) + +ENTRY(relative_data32) + adr x1, 0f + ldr w0, [x1] + add x0, x0, x1 + ret +0: .long sym64_rel - . +ENDPROC(relative_data32) + +ENTRY(relative_data16) + adr x1, 0f + ldrsh w0, [x1] + add x0, x0, x1 + ret +0: .short sym64_rel - ., 0 +ENDPROC(relative_data16) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 42274bda0ccb..2c822ef94f34 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -31,7 +31,6 @@ #include <linux/screen_info.h> #include <linux/init.h> #include <linux/kexec.h> -#include <linux/crash_dump.h> #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/interrupt.h> @@ -181,6 +180,7 @@ static void __init smp_build_mpidr_hash(void) static void __init setup_machine_fdt(phys_addr_t dt_phys) { void *dt_virt = fixmap_remap_fdt(dt_phys); + const char *name; if (!dt_virt || !early_init_dt_scan(dt_virt)) { pr_crit("\n" @@ -193,7 +193,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } - dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); + name = of_flat_dt_get_machine_name(); + pr_info("Machine model: %s\n", name); + dump_stack_set_arch_desc("%s (DT)", name); } static void __init request_standard_resources(void) @@ -226,6 +228,12 @@ static void __init request_standard_resources(void) if (kernel_data.start >= res->start && kernel_data.end <= res->end) request_resource(res, &kernel_data); +#ifdef CONFIG_KEXEC_CORE + /* Userspace will find "Crash kernel" region in /proc/iomem. */ + if (crashk_res.end && crashk_res.start >= res->start && + crashk_res.end <= res->end) + request_resource(res, &crashk_res); +#endif } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9b1036570586..6e0e16a3a7d4 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <linux/completion.h> #include <linux/of.h> #include <linux/irq_work.h> +#include <linux/kexec.h> #include <asm/alternative.h> #include <asm/atomic.h> @@ -76,6 +77,7 @@ enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_TIMER, IPI_IRQ_WORK, IPI_WAKEUP @@ -434,6 +436,7 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_cpu_features(); hyp_mode_check(); apply_alternatives_all(); + mark_linear_text_alias_ro(); } void __init smp_prepare_boot_cpu(void) @@ -518,6 +521,13 @@ static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; #ifdef CONFIG_ACPI +static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; + +struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu) +{ + return &cpu_madt_gicc[cpu]; +} + /* * acpi_map_gic_cpu_interface - parse processor MADT entry * @@ -552,6 +562,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; } bootcpu_valid = true; + cpu_madt_gicc[0] = *processor; early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); return; } @@ -562,6 +573,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) /* map the logical cpu id to cpu MPIDR */ cpu_logical_map(cpu_count) = hwid; + cpu_madt_gicc[cpu_count] = *processor; + /* * Set-up the ACPI parking protocol cpu entries * while initializing the cpu_logical_map to @@ -755,6 +768,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), S(IPI_WAKEUP, "CPU wake-up interrupts"), @@ -829,6 +843,29 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); +#endif + +static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ +#ifdef CONFIG_KEXEC_CORE + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); +#endif + + /* just in case */ + cpu_park_loop(); +#endif +} + /* * Main handler for inter-processor interrupts */ @@ -859,6 +896,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_CPU_CRASH_STOP: + if (IS_ENABLED(CONFIG_KEXEC_CORE)) { + irq_enter(); + ipi_cpu_crash_stop(cpu, regs); + + unreachable(); + } + break; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: irq_enter(); @@ -931,6 +977,39 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +void smp_send_crash_stop(void) +{ + cpumask_t mask; + unsigned long timeout; + + if (num_online_cpus() == 1) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + + pr_crit("SMP: stopping secondary CPUs\n"); + smp_cross_call(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + /* * not supported here */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 1de444e6c669..0805b44f986a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -443,7 +443,7 @@ int cpu_enable_cache_maint_trap(void *__unused) } #define __user_cache_maint(insn, address, res) \ - if (untagged_addr(address) >= user_addr_max()) { \ + if (address >= user_addr_max()) { \ res = -EFAULT; \ } else { \ uaccess_ttbr0_enable(); \ @@ -469,7 +469,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; - address = pt_regs_read_reg(regs, rt); + address = untagged_addr(pt_regs_read_reg(regs, rt)); switch (crm) { case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ @@ -513,6 +513,14 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, rt, read_sysreg(cntfrq_el0)); + regs->pc += 4; +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -537,6 +545,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT, .handler = cntvct_read_handler, }, + { + /* Trap read access to CNTFRQ_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, + .handler = cntfrq_read_handler, + }, {}, }; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b8deffa9e1bf..987a00ee446c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -143,12 +143,27 @@ SECTIONS . = ALIGN(SEGMENT_ALIGN); __init_begin = .; + __inittext_begin = .; INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { + *(.altinstr_replacement) + } + + . = ALIGN(PAGE_SIZE); + __inittext_end = .; + __initdata_begin = .; + .init.data : { INIT_DATA INIT_SETUP(16) @@ -164,15 +179,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(4); - .altinstructions : { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : { - *(.altinstr_replacement) - } .rela : ALIGN(8) { *(.rela .rela*) } @@ -181,6 +187,7 @@ SECTIONS __rela_size = SIZEOF(.rela); . = ALIGN(SEGMENT_ALIGN); + __initdata_end = .; __init_end = .; _data = .; @@ -206,6 +213,7 @@ SECTIONS } PECOFF_EDATA_PADDING + __pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin); _edata = .; BSS_SECTION(0, 0, 0) @@ -221,6 +229,7 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; STABS_DEBUG |