diff options
59 files changed, 1942 insertions, 780 deletions
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index 61ca21ebef1a..d1c97f9f51cc 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -169,6 +169,18 @@ infrastructure: as available on the CPU where it is fetched and is not a system wide safe value. + 4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1 + + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | LRCPC | [23-20] | y | + |--------------------------------------------------| + | FCMA | [19-16] | y | + |--------------------------------------------------| + | JSCVT | [15-12] | y | + x--------------------------------------------------x + Appendix I: Example --------------------------- diff --git a/Documentation/devicetree/bindings/chosen.txt b/Documentation/devicetree/bindings/chosen.txt index 6ae9d82d4c37..b5e39af4ddc0 100644 --- a/Documentation/devicetree/bindings/chosen.txt +++ b/Documentation/devicetree/bindings/chosen.txt @@ -52,3 +52,48 @@ This property is set (currently only on PowerPC, and only needed on book3e) by some versions of kexec-tools to tell the new kernel that it is being booted by kexec, as the booting environment may differ (e.g. a different secondary CPU release mechanism) + +linux,usable-memory-range +------------------------- + +This property (arm64 only) holds a base address and size, describing a +limited region in which memory may be considered available for use by +the kernel. Memory outside of this range is not available for use. + +This property describes a limitation: memory within this range is only +valid when also described through another mechanism that the kernel +would otherwise use to determine available memory (e.g. memory nodes +or the EFI memory map). Valid memory may be sparse within the range. +e.g. + +/ { + chosen { + linux,usable-memory-range = <0x9 0xf0000000 0x0 0x10000000>; + }; +}; + +The main usage is for crash dump kernel to identify its own usable +memory and exclude, at its boot time, any other memory areas that are +part of the panicked kernel's memory. + +While this property does not represent a real hardware, the address +and the size are expressed in #address-cells and #size-cells, +respectively, of the root node. + +linux,elfcorehdr +---------------- + +This property (currently used only on arm64) holds the memory range, +the address and the size, of the elf core header which mainly describes +the panicked kernel's memory layout as PT_LOAD segments of elf format. +e.g. + +/ { + chosen { + linux,elfcorehdr = <0x9 0xfffff000 0x0 0x800>; + }; +}; + +While this property does not represent a real hardware, the address +and the size are expressed in #address-cells and #size-cells, +respectively, of the root node. diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index b0eb27b956d9..615434d81108 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to a remote system. Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64, -s390x and arm architectures. +s390x, arm and arm64 architectures. When the system kernel boots, it reserves a small section of memory for the dump-capture kernel. This ensures that ongoing Direct Memory Access @@ -249,6 +249,13 @@ Dump-capture kernel config options (Arch Dependent, arm) AUTO_ZRELADDR=y +Dump-capture kernel config options (Arch Dependent, arm64) +---------------------------------------------------------- + +- Please note that kvm of the dump-capture kernel will not be enabled + on non-VHE systems even if it is configured. This is because the CPU + will not be reset to EL2 on panic. + Extended crashkernel syntax =========================== @@ -305,6 +312,8 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. + On arm64, use "crashkernel=Y[@X]". Note that the start address of + the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000). Load the Dump-capture Kernel ============================ @@ -327,6 +336,8 @@ For s390x: - Use image or bzImage For arm: - Use zImage +For arm64: + - Use vmlinux or Image If you are using a uncompressed vmlinux image then use following command to load dump-capture kernel. @@ -370,6 +381,9 @@ For s390x: For arm: "1 maxcpus=1 reset_devices" +For arm64: + "1 maxcpus=1 reset_devices" + Notes on loading the dump-capture kernel: * By default, the ELF headers are stored in ELF64 format to support diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3741859765cf..e7f043efff41 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -736,6 +736,17 @@ config KEXEC but it is independent of the system firmware. And like a reboot you can start any kernel with it, not just Linux. +config CRASH_DUMP + bool "Build kdump crash kernel" + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. + + For more details see Documentation/kdump/kdump.txt + config XEN_DOM0 def_bool y depends on XEN diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index fca2f02cde68..cc6bd559af85 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -92,6 +92,10 @@ config DEBUG_EFI the kernel that are only useful when using a debug build of the UEFI firmware +config ARM64_RELOC_TEST + depends on m + tristate "Relocation testing module" + source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 7c48028ec64a..927ee18bbdf2 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -82,6 +82,7 @@ CONFIG_CMA=y CONFIG_SECCOMP=y CONFIG_XEN=y CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y CONFIG_CPU_IDLE=y diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index f37e3a21f6e7..1a98bc8602a2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -20,69 +20,14 @@ #include <asm/sysreg.h> -#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) -#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) -#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) -#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) -#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) -#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) -#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) -#define ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) - -#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) - -/* - * System register definitions - */ -#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) -#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) -#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) -#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) -#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) -#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) -#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) - -#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x) -#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x) - -#define ICH_LR0_EL2 __LR0_EL2(0) -#define ICH_LR1_EL2 __LR0_EL2(1) -#define ICH_LR2_EL2 __LR0_EL2(2) -#define ICH_LR3_EL2 __LR0_EL2(3) -#define ICH_LR4_EL2 __LR0_EL2(4) -#define ICH_LR5_EL2 __LR0_EL2(5) -#define ICH_LR6_EL2 __LR0_EL2(6) -#define ICH_LR7_EL2 __LR0_EL2(7) -#define ICH_LR8_EL2 __LR8_EL2(0) -#define ICH_LR9_EL2 __LR8_EL2(1) -#define ICH_LR10_EL2 __LR8_EL2(2) -#define ICH_LR11_EL2 __LR8_EL2(3) -#define ICH_LR12_EL2 __LR8_EL2(4) -#define ICH_LR13_EL2 __LR8_EL2(5) -#define ICH_LR14_EL2 __LR8_EL2(6) -#define ICH_LR15_EL2 __LR8_EL2(7) - -#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) -#define ICH_AP0R0_EL2 __AP0Rx_EL2(0) -#define ICH_AP0R1_EL2 __AP0Rx_EL2(1) -#define ICH_AP0R2_EL2 __AP0Rx_EL2(2) -#define ICH_AP0R3_EL2 __AP0Rx_EL2(3) - -#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) -#define ICH_AP1R0_EL2 __AP1Rx_EL2(0) -#define ICH_AP1R1_EL2 __AP1Rx_EL2(1) -#define ICH_AP1R2_EL2 __AP1Rx_EL2(2) -#define ICH_AP1R3_EL2 __AP1Rx_EL2(3) - #ifndef __ASSEMBLY__ #include <linux/stringify.h> #include <asm/barrier.h> #include <asm/cacheflush.h> -#define read_gicreg read_sysreg_s -#define write_gicreg write_sysreg_s +#define read_gicreg(r) read_sysreg_s(SYS_ ## r) +#define write_gicreg(v, r) write_sysreg_s(v, SYS_ ## r) /* * Low-level accessors @@ -93,13 +38,13 @@ static inline void gic_write_eoir(u32 irq) { - write_sysreg_s(irq, ICC_EOIR1_EL1); + write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); isb(); } static inline void gic_write_dir(u32 irq) { - write_sysreg_s(irq, ICC_DIR_EL1); + write_sysreg_s(irq, SYS_ICC_DIR_EL1); isb(); } @@ -107,7 +52,7 @@ static inline u64 gic_read_iar_common(void) { u64 irqstat; - irqstat = read_sysreg_s(ICC_IAR1_EL1); + irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); dsb(sy); return irqstat; } @@ -124,7 +69,7 @@ static inline u64 gic_read_iar_cavium_thunderx(void) u64 irqstat; nops(8); - irqstat = read_sysreg_s(ICC_IAR1_EL1); + irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); nops(4); mb(); @@ -133,40 +78,40 @@ static inline u64 gic_read_iar_cavium_thunderx(void) static inline void gic_write_pmr(u32 val) { - write_sysreg_s(val, ICC_PMR_EL1); + write_sysreg_s(val, SYS_ICC_PMR_EL1); } static inline void gic_write_ctlr(u32 val) { - write_sysreg_s(val, ICC_CTLR_EL1); + write_sysreg_s(val, SYS_ICC_CTLR_EL1); isb(); } static inline void gic_write_grpen1(u32 val) { - write_sysreg_s(val, ICC_GRPEN1_EL1); + write_sysreg_s(val, SYS_ICC_GRPEN1_EL1); isb(); } static inline void gic_write_sgi1r(u64 val) { - write_sysreg_s(val, ICC_SGI1R_EL1); + write_sysreg_s(val, SYS_ICC_SGI1R_EL1); } static inline u32 gic_read_sre(void) { - return read_sysreg_s(ICC_SRE_EL1); + return read_sysreg_s(SYS_ICC_SRE_EL1); } static inline void gic_write_sre(u32 val) { - write_sysreg_s(val, ICC_SRE_EL1); + write_sysreg_s(val, SYS_ICC_SRE_EL1); isb(); } static inline void gic_write_bpr1(u32 val) { - asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val)); + write_sysreg_s(val, SYS_ICC_BPR1_EL1); } #define gic_read_typer(c) readq_relaxed(c) diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 561190d15881..0bfe1df12b19 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -20,9 +20,6 @@ #include <asm/brk-imm.h> -#ifdef CONFIG_GENERIC_BUG -#define HAVE_ARCH_BUG - #ifdef CONFIG_DEBUG_BUGVERBOSE #define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) #define __BUGVERBOSE_LOCATION(file, line) \ @@ -36,28 +33,36 @@ #define _BUGVERBOSE_LOCATION(file, line) #endif -#define _BUG_FLAGS(flags) __BUG_FLAGS(flags) +#ifdef CONFIG_GENERIC_BUG -#define __BUG_FLAGS(flags) asm volatile ( \ +#define __BUG_ENTRY(flags) \ ".pushsection __bug_table,\"a\"\n\t" \ ".align 2\n\t" \ "0: .long 1f - 0b\n\t" \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ ".short " #flags "\n\t" \ ".popsection\n" \ - \ - "1: brk %[imm]" \ - :: [imm] "i" (BUG_BRK_IMM) \ -) + "1: " +#else +#define __BUG_ENTRY(flags) "" +#endif + +#define __BUG_FLAGS(flags) \ + asm volatile ( \ + __BUG_ENTRY(flags) \ + "brk %[imm]" :: [imm] "i" (BUG_BRK_IMM) \ + ); -#define BUG() do { \ - _BUG_FLAGS(0); \ - unreachable(); \ + +#define BUG() do { \ + __BUG_FLAGS(0); \ + unreachable(); \ } while (0) -#define __WARN_TAINT(taint) _BUG_FLAGS(BUGFLAG_TAINT(taint)) +#define __WARN_TAINT(taint) \ + __BUG_FLAGS(BUGFLAG_TAINT(taint)) -#endif /* ! CONFIG_GENERIC_BUG */ +#define HAVE_ARCH_BUG #include <asm-generic/bug.h> diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5082b30bc2c0..ea9bb4e0e9bb 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -16,7 +16,18 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H -#include <asm/cachetype.h> +#include <asm/cputype.h> + +#define CTR_L1IP_SHIFT 14 +#define CTR_L1IP_MASK 3 +#define CTR_CWG_SHIFT 24 +#define CTR_CWG_MASK 15 + +#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) + +#define ICACHE_POLICY_VPIPT 0 +#define ICACHE_POLICY_VIPT 2 +#define ICACHE_POLICY_PIPT 3 #define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) @@ -32,6 +43,31 @@ #ifndef __ASSEMBLY__ +#include <linux/bitops.h> + +#define ICACHEF_ALIASING 0 +#define ICACHEF_VPIPT 1 +extern unsigned long __icache_flags; + +/* + * Whilst the D-side always behaves as PIPT on AArch64, aliasing is + * permitted in the I-cache. + */ +static inline int icache_is_aliasing(void) +{ + return test_bit(ICACHEF_ALIASING, &__icache_flags); +} + +static inline int icache_is_vpipt(void) +{ + return test_bit(ICACHEF_VPIPT, &__icache_flags); +} + +static inline u32 cache_type_cwg(void) +{ + return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; +} + #define __read_mostly __attribute__((__section__(".data..read_mostly"))) static inline int cache_line_size(void) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 5a2a6ee65f65..728f933cef8c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -154,5 +154,6 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +int set_memory_valid(unsigned long addr, unsigned long size, int enable); #endif diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h deleted file mode 100644 index f5588692f1d4..000000000000 --- a/arch/arm64/include/asm/cachetype.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_CACHETYPE_H -#define __ASM_CACHETYPE_H - -#include <asm/cputype.h> - -#define CTR_L1IP_SHIFT 14 -#define CTR_L1IP_MASK 3 -#define CTR_CWG_SHIFT 24 -#define CTR_CWG_MASK 15 - -#define ICACHE_POLICY_RESERVED 0 -#define ICACHE_POLICY_AIVIVT 1 -#define ICACHE_POLICY_VIPT 2 -#define ICACHE_POLICY_PIPT 3 - -#ifndef __ASSEMBLY__ - -#include <linux/bitops.h> - -#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) - -#define ICACHEF_ALIASING 0 -#define ICACHEF_AIVIVT 1 - -extern unsigned long __icache_flags; - -/* - * NumSets, bits[27:13] - (Number of sets in cache) - 1 - * Associativity, bits[12:3] - (Associativity of cache) - 1 - * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2 - */ -#define CCSIDR_EL1_WRITE_THROUGH BIT(31) -#define CCSIDR_EL1_WRITE_BACK BIT(30) -#define CCSIDR_EL1_READ_ALLOCATE BIT(29) -#define CCSIDR_EL1_WRITE_ALLOCATE BIT(28) -#define CCSIDR_EL1_LINESIZE_MASK 0x7 -#define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK) -#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT 3 -#define CCSIDR_EL1_ASSOCIATIVITY_MASK 0x3ff -#define CCSIDR_EL1_ASSOCIATIVITY(x) \ - (((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK) -#define CCSIDR_EL1_NUMSETS_SHIFT 13 -#define CCSIDR_EL1_NUMSETS_MASK 0x7fff -#define CCSIDR_EL1_NUMSETS(x) \ - (((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK) - -#define CACHE_LINESIZE(x) (16 << CCSIDR_EL1_LINESIZE(x)) -#define CACHE_NUMSETS(x) (CCSIDR_EL1_NUMSETS(x) + 1) -#define CACHE_ASSOCIATIVITY(x) (CCSIDR_EL1_ASSOCIATIVITY(x) + 1) - -extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr); - -/* Helpers for Level 1 Instruction cache csselr = 1L */ -static inline int icache_get_linesize(void) -{ - return CACHE_LINESIZE(cache_get_ccsidr(1L)); -} - -static inline int icache_get_numsets(void) -{ - return CACHE_NUMSETS(cache_get_ccsidr(1L)); -} - -/* - * Whilst the D-side always behaves as PIPT on AArch64, aliasing is - * permitted in the I-cache. - */ -static inline int icache_is_aliasing(void) -{ - return test_bit(ICACHEF_ALIASING, &__icache_flags); -} - -static inline int icache_is_aivivt(void) -{ - return test_bit(ICACHEF_AIVIVT, &__icache_flags); -} - -static inline u32 cache_type_cwg(void) -{ - return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; -} - -#endif /* __ASSEMBLY__ */ - -#endif /* __ASM_CACHETYPE_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f31c48d0cd68..e7f84a7b4465 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -226,7 +226,7 @@ void update_cpu_errata_workarounds(void); void __init enable_errata_workarounds(void); void verify_local_cpu_errata_workarounds(void); -u64 read_system_reg(u32 id); +u64 read_sanitised_ftr_reg(u32 id); static inline bool cpu_supports_mixed_endian_el0(void) { @@ -240,7 +240,7 @@ static inline bool system_supports_32bit_el0(void) static inline bool system_supports_mixed_endian_el0(void) { - return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); + return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1)); } static inline bool system_supports_fpsimd(void) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 8740297dac77..1473fc2f7ab7 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 6 +#define NR_IPI 7 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index b6b167ac082b..41770766d964 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -149,7 +149,7 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_BRPS_SHIFT); @@ -158,7 +158,7 @@ static inline int get_num_brps(void) /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_WRPS_SHIFT); diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 04744dc5fb61..e17f0529a882 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -40,9 +40,59 @@ static inline void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) { - /* Empty routine needed to avoid build errors. */ + if (oldregs) { + memcpy(newregs, oldregs, sizeof(*newregs)); + } else { + u64 tmp1, tmp2; + + __asm__ __volatile__ ( + "stp x0, x1, [%2, #16 * 0]\n" + "stp x2, x3, [%2, #16 * 1]\n" + "stp x4, x5, [%2, #16 * 2]\n" + "stp x6, x7, [%2, #16 * 3]\n" + "stp x8, x9, [%2, #16 * 4]\n" + "stp x10, x11, [%2, #16 * 5]\n" + "stp x12, x13, [%2, #16 * 6]\n" + "stp x14, x15, [%2, #16 * 7]\n" + "stp x16, x17, [%2, #16 * 8]\n" + "stp x18, x19, [%2, #16 * 9]\n" + "stp x20, x21, [%2, #16 * 10]\n" + "stp x22, x23, [%2, #16 * 11]\n" + "stp x24, x25, [%2, #16 * 12]\n" + "stp x26, x27, [%2, #16 * 13]\n" + "stp x28, x29, [%2, #16 * 14]\n" + "mov %0, sp\n" + "stp x30, %0, [%2, #16 * 15]\n" + + "/* faked current PSTATE */\n" + "mrs %0, CurrentEL\n" + "mrs %1, SPSEL\n" + "orr %0, %0, %1\n" + "mrs %1, DAIF\n" + "orr %0, %0, %1\n" + "mrs %1, NZCV\n" + "orr %0, %0, %1\n" + /* pc */ + "adr %1, 1f\n" + "1:\n" + "stp %1, %0, [%2, #16 * 16]\n" + : "=&r" (tmp1), "=&r" (tmp2) + : "r" (newregs) + : "memory" + ); + } } +#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION) +extern bool crash_is_nosave(unsigned long pfn); +extern void crash_prepare_suspend(void); +extern void crash_post_resume(void); +#else +static inline bool crash_is_nosave(unsigned long pfn) {return false; } +static inline void crash_prepare_suspend(void) {} +static inline void crash_post_resume(void) {} +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ed1246014901..2bc6ffa7b89b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -108,7 +108,7 @@ alternative_else_nop_endif #else #include <asm/pgalloc.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> @@ -242,12 +242,13 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_flush_dcache_to_poc(va, size); - if (!icache_is_aliasing()) { /* PIPT */ - flush_icache_range((unsigned long)va, - (unsigned long)va + size); - } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ + if (icache_is_aliasing()) { /* any kind of VIPT cache */ __flush_icache_all(); + } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { + /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + flush_icache_range((unsigned long)va, + (unsigned long)va + size); } } @@ -307,7 +308,7 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, static inline unsigned int kvm_get_vmid_bits(void) { - int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); + int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 47619411f0ff..5468c834b072 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -37,5 +37,6 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +extern void mark_linear_text_alias_ro(void); #endif diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 06ff7fd9e81f..e12af6754634 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -17,7 +17,6 @@ #define __ASM_MODULE_H #include <asm-generic/module.h> -#include <asm/memory.h> #define MODULE_ARCH_VERMAGIC "aarch64" @@ -33,10 +32,6 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, Elf64_Sym *sym); #ifdef CONFIG_RANDOMIZE_BASE -#ifdef CONFIG_MODVERSIONS -#define ARCH_RELOCATES_KCRCTAB -#define reloc_start (kimage_vaddr - KIMAGE_VADDR) -#endif extern u64 module_alloc_base; #else #define module_alloc_base ((u64)_etext - MODULES_VSIZE) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0eef6064bf3b..c213fdbd056c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -74,6 +74,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_cont_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + +#define pmd_cont_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index c97b8bd2acba..9428b93fefb2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -50,6 +50,7 @@ extern phys_addr_t arm64_dma_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { +#ifdef CONFIG_HAVE_HW_BREAKPOINT /* Have we suspended stepping by a debugger? */ int suspended_step; /* Allow breakpoints and watchpoints to be disabled for this thread. */ @@ -58,6 +59,7 @@ struct debug_info { /* Hardware breakpoints pinned to this task. */ struct perf_event *hbp_break[ARM_MAX_BRP]; struct perf_event *hbp_watch[ARM_MAX_WRP]; +#endif }; struct cpu_context { diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 4e7e7067afdb..941267caa39c 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -24,6 +24,8 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; extern char __idmap_text_start[], __idmap_text_end[]; +extern char __initdata_begin[], __initdata_end[]; +extern char __inittext_begin[], __inittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d050d720a1b4..55f08c5acfad 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -148,6 +148,9 @@ static inline void cpu_panic_kernel(void) */ bool cpus_are_stuck_in_kernel(void); +extern void smp_send_crash_stop(void); +extern bool smp_crash_stop_failed(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ac24b6e798b1..15c142ce991c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -48,6 +48,8 @@ ((crn) << CRn_shift) | ((crm) << CRm_shift) | \ ((op2) << Op2_shift)) +#define sys_insn sys_reg + #define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask) #define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask) #define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask) @@ -81,6 +83,41 @@ #endif /* CONFIG_BROKEN_GAS_INST */ +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ + (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ + (!!x)<<8 | 0x1f) + +#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) +#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) +#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) + +#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) +#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) +#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) +#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) +#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) +#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) +#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) +#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) +#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) +#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) +#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) +#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) +#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) +#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) +#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6) +#define SYS_DBGCLAIMCLR_EL1 sys_reg(2, 0, 7, 9, 6) +#define SYS_DBGAUTHSTATUS_EL1 sys_reg(2, 0, 7, 14, 6) +#define SYS_MDCCSR_EL0 sys_reg(2, 3, 0, 1, 0) +#define SYS_DBGDTR_EL0 sys_reg(2, 3, 0, 4, 0) +#define SYS_DBGDTRRX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -88,6 +125,7 @@ #define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) #define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) @@ -118,17 +156,127 @@ #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) -#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) +#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) +#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) + +#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) +#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) +#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) + +#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) + +#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) +#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1) +#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0) +#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) +#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) + +#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) +#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) + +#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) +#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) + +#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) + +#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) +#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) + +#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) +#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) + +#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) + +#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) +#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) + +#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) + #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +#define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) +#define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) +#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) +#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) +#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) +#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) +#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) +#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) +#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) +#define SYS_PMXEVTYPER_EL0 sys_reg(3, 3, 9, 13, 1) +#define SYS_PMXEVCNTR_EL0 sys_reg(3, 3, 9, 13, 2) +#define SYS_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0) +#define SYS_PMOVSSET_EL0 sys_reg(3, 3, 9, 14, 3) + +#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) +#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) + +#define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) +#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) +#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) + +#define __PMEV_op2(n) ((n) & 0x7) +#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) +#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) +#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) +#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) + +#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7) + +#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) + +#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) +#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) +#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) +#define SYS_ICH_AP0R2_EL2 __SYS__AP0Rx_EL2(2) +#define SYS_ICH_AP0R3_EL2 __SYS__AP0Rx_EL2(3) + +#define __SYS__AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) +#define SYS_ICH_AP1R0_EL2 __SYS__AP1Rx_EL2(0) +#define SYS_ICH_AP1R1_EL2 __SYS__AP1Rx_EL2(1) +#define SYS_ICH_AP1R2_EL2 __SYS__AP1Rx_EL2(2) +#define SYS_ICH_AP1R3_EL2 __SYS__AP1Rx_EL2(3) + +#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) +#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define SYS_ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) + +#define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define SYS_ICH_LR0_EL2 __SYS__LR0_EL2(0) +#define SYS_ICH_LR1_EL2 __SYS__LR0_EL2(1) +#define SYS_ICH_LR2_EL2 __SYS__LR0_EL2(2) +#define SYS_ICH_LR3_EL2 __SYS__LR0_EL2(3) +#define SYS_ICH_LR4_EL2 __SYS__LR0_EL2(4) +#define SYS_ICH_LR5_EL2 __SYS__LR0_EL2(5) +#define SYS_ICH_LR6_EL2 __SYS__LR0_EL2(6) +#define SYS_ICH_LR7_EL2 __SYS__LR0_EL2(7) + +#define __SYS__LR8_EL2(x) sys_reg(3, 4, 12, 13, x) +#define SYS_ICH_LR8_EL2 __SYS__LR8_EL2(0) +#define SYS_ICH_LR9_EL2 __SYS__LR8_EL2(1) +#define SYS_ICH_LR10_EL2 __SYS__LR8_EL2(2) +#define SYS_ICH_LR11_EL2 __SYS__LR8_EL2(3) +#define SYS_ICH_LR12_EL2 __SYS__LR8_EL2(4) +#define SYS_ICH_LR13_EL2 __SYS__LR8_EL2(5) +#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) +#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_EE (1 << 25) @@ -156,6 +304,11 @@ #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_AES_SHIFT 4 +/* id_aa64isar1 */ +#define ID_AA64ISAR1_LRCPC_SHIFT 20 +#define ID_AA64ISAR1_FCMA_SHIFT 16 +#define ID_AA64ISAR1_JSCVT_SHIFT 12 + /* id_aa64pfr0 */ #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 61c263cba272..4e187ce2a811 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -32,5 +32,8 @@ #define HWCAP_ASIMDHP (1 << 10) #define HWCAP_CPUID (1 << 11) #define HWCAP_ASIMDRDM (1 << 12) +#define HWCAP_JSCVT (1 << 13) +#define HWCAP_FCMA (1 << 14) +#define HWCAP_LRCPC (1 << 15) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1606c6b2a280..1dcb69d3d0e5 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -50,6 +50,9 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ cpu-reset.o +arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o +arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o +arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 06d650f61da7..8840c109c5d6 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -105,11 +105,11 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) return insn; } -static void __apply_alternatives(void *alt_region) +static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - u32 *origptr, *replptr; + u32 *origptr, *replptr, *updptr; for (alt = region->begin; alt < region->end; alt++) { u32 insn; @@ -124,11 +124,12 @@ static void __apply_alternatives(void *alt_region) origptr = ALT_ORIG_PTR(alt); replptr = ALT_REPL_PTR(alt); + updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr; nr_inst = alt->alt_len / sizeof(insn); for (i = 0; i < nr_inst; i++) { insn = get_alt_insn(alt, origptr + i, replptr + i); - *(origptr + i) = cpu_to_le32(insn); + updptr[i] = cpu_to_le32(insn); } flush_icache_range((uintptr_t)origptr, @@ -155,7 +156,7 @@ static int __apply_alternatives_multi_stop(void *unused) isb(); } else { BUG_ON(patched); - __apply_alternatives(®ion); + __apply_alternatives(®ion, true); /* Barriers provided by the cache flushing */ WRITE_ONCE(patched, 1); } @@ -176,5 +177,5 @@ void apply_alternatives(void *start, size_t length) .end = start + length, }; - __apply_alternatives(®ion); + __apply_alternatives(®ion, false); } diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 3f2250fc391b..380f2e2fbed5 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,15 +17,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/bitops.h> #include <linux/cacheinfo.h> -#include <linux/cpu.h> -#include <linux/compiler.h> #include <linux/of.h> -#include <asm/cachetype.h> -#include <asm/processor.h> - #define MAX_CACHE_LEVEL 7 /* Max 7 level supported */ /* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ #define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) @@ -43,43 +37,11 @@ static inline enum cache_type get_cache_type(int level) return CLIDR_CTYPE(clidr, level); } -/* - * Cache Size Selection Register(CSSELR) selects which Cache Size ID - * Register(CCSIDR) is accessible by specifying the required cache - * level and the cache type. We need to ensure that no one else changes - * CSSELR by calling this in non-preemtible context - */ -u64 __attribute_const__ cache_get_ccsidr(u64 csselr) -{ - u64 ccsidr; - - WARN_ON(preemptible()); - - write_sysreg(csselr, csselr_el1); - isb(); - ccsidr = read_sysreg(ccsidr_el1); - - return ccsidr; -} - static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, unsigned int level) { - bool is_icache = type & CACHE_TYPE_INST; - u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache); - this_leaf->level = level; this_leaf->type = type; - this_leaf->coherency_line_size = CACHE_LINESIZE(tmp); - this_leaf->number_of_sets = CACHE_NUMSETS(tmp); - this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp); - this_leaf->size = this_leaf->number_of_sets * - this_leaf->coherency_line_size * this_leaf->ways_of_associativity; - this_leaf->attributes = - ((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) | - ((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) | - ((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) | - ((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0); } static int __init_cache_level(unsigned int cpu) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6eb77ae99b79..94b8f7fc3310 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -97,6 +97,13 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), @@ -153,9 +160,9 @@ static const struct arm64_ftr_bits ftr_ctr[] = { /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. - * If we have differing I-cache policies, report it as the weakest - AIVIVT. + * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; @@ -314,7 +321,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -585,7 +592,7 @@ void update_cpu_features(int cpu, * If we have AArch32, we care about 32-bit features for compat. * If the system doesn't support AArch32, don't update them. */ - if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, @@ -636,7 +643,7 @@ void update_cpu_features(int cpu, "Unsupported CPU feature variation.\n"); } -u64 read_system_reg(u32 id) +u64 read_sanitised_ftr_reg(u32 id) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); @@ -649,10 +656,10 @@ u64 read_system_reg(u32 id) case r: return read_sysreg_s(r) /* - * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ -static u64 __raw_read_system_reg(u32 sys_id) +static u64 __read_sysreg_by_encoding(u32 sys_id) { switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); @@ -709,9 +716,9 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); if (scope == SCOPE_SYSTEM) - val = read_system_reg(entry->sys_reg); + val = read_sanitised_ftr_reg(entry->sys_reg); else - val = __raw_read_system_reg(entry->sys_reg); + val = __read_sysreg_by_encoding(entry->sys_reg); return feature_matches(val, entry); } @@ -761,7 +768,7 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) { - u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return cpuid_feature_extract_signed_field(pfr0, ID_AA64PFR0_FP_SHIFT) < 0; @@ -888,6 +895,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 5b22c687f02a..68b1f364c515 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -15,7 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <asm/arch_timer.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpufeature.h> @@ -43,10 +43,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; static char *icache_policy_str[] = { - [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", - [ICACHE_POLICY_AIVIVT] = "AIVIVT", - [ICACHE_POLICY_VIPT] = "VIPT", - [ICACHE_POLICY_PIPT] = "PIPT", + [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_VIPT] = "VIPT", + [ICACHE_POLICY_PIPT] = "PIPT", + [ICACHE_POLICY_VPIPT] = "VPIPT", }; unsigned long __icache_flags; @@ -65,6 +65,9 @@ static const char *const hwcap_str[] = { "asimdhp", "cpuid", "asimdrdm", + "jscvt", + "fcma", + "lrcpc", NULL }; @@ -289,20 +292,18 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) unsigned int cpu = smp_processor_id(); u32 l1ip = CTR_L1IP(info->reg_ctr); - if (l1ip != ICACHE_POLICY_PIPT) { - /* - * VIPT caches are non-aliasing if the VA always equals the PA - * in all bit positions that are covered by the index. This is - * the case if the size of a way (# of sets * line size) does - * not exceed PAGE_SIZE. - */ - u32 waysize = icache_get_numsets() * icache_get_linesize(); - - if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) - set_bit(ICACHEF_ALIASING, &__icache_flags); + switch (l1ip) { + case ICACHE_POLICY_PIPT: + break; + case ICACHE_POLICY_VPIPT: + set_bit(ICACHEF_VPIPT, &__icache_flags); + break; + default: + /* Fallthrough */ + case ICACHE_POLICY_VIPT: + /* Assume aliasing */ + set_bit(ICACHEF_ALIASING, &__icache_flags); } - if (l1ip == ICACHE_POLICY_AIVIVT) - set_bit(ICACHEF_AIVIVT, &__icache_flags); pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c new file mode 100644 index 000000000000..f46d57c31443 --- /dev/null +++ b/arch/arm64/kernel/crash_dump.c @@ -0,0 +1,71 @@ +/* + * Routines for doing kexec-based kdump + * + * Copyright (C) 2017 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crash_dump.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/memblock.h> +#include <linux/uaccess.h> +#include <asm/memory.h> + +/** + * copy_oldmem_page() - copy one page from old kernel memory + * @pfn: page frame number to be copied + * @buf: buffer where the copied page is placed + * @csize: number of bytes to copy + * @offset: offset in bytes into the page + * @userbuf: if set, @buf is in a user address space + * + * This function copies one page from old kernel memory into buffer pointed by + * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes + * copied or negative error in case of failure. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, + int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + if (userbuf) { + if (copy_to_user((char __user *)buf, vaddr + offset, csize)) { + memunmap(vaddr); + return -EFAULT; + } + } else { + memcpy(buf, vaddr + offset, csize); + } + + memunmap(vaddr); + + return csize; +} + +/** + * elfcorehdr_read - read from ELF core header + * @buf: buffer where the data is placed + * @csize: number of bytes to read + * @ppos: address in the memory + * + * This function reads @count bytes from elf core header which exists + * on crash dump kernel's memory. + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count); + return count; +} diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 32913567da08..d618e25c3de1 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -36,7 +36,7 @@ /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1), ID_AA64DFR0_DEBUGVER_SHIFT); } diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S new file mode 100644 index 000000000000..613fc3000677 --- /dev/null +++ b/arch/arm64/kernel/efi-header.S @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2013 - 2017 Linaro, Ltd. + * Copyright (C) 2013, 2014 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/pe.h> +#include <linux/sizes.h> + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: + .short IMAGE_FILE_MACHINE_ARM64 // Machine + .short section_count // NumberOfSections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 0 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics + +optional_header: + .short PE_OPT_MAGIC_PE32PLUS // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long __initdata_begin - efi_header_end // SizeOfCode + .long __pecoff_data_size // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long __efistub_entry - _head // AddressOfEntryPoint + .long efi_header_end - _head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long SZ_4K // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _end - _head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long efi_header_end - _head // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long (section_table - .) / 8 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + +#ifdef CONFIG_DEBUG_EFI + .long efi_debug_table - _head // DebugTable + .long efi_debug_table_size +#endif + + // Section table +section_table: + .ascii ".text\0\0\0" + .long __initdata_begin - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long __initdata_begin - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE // Characteristics + + .ascii ".data\0\0\0" + .long __pecoff_data_size // VirtualSize + .long __initdata_begin - _head // VirtualAddress + .long __pecoff_data_rawsize // SizeOfRawData + .long __initdata_begin - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE // Characteristics + + .set section_count, (. - section_table) / 40 + +#ifdef CONFIG_DEBUG_EFI + /* + * The debug table is referenced via its Relative Virtual Address (RVA), + * which is only defined for those parts of the image that are covered + * by a section declaration. Since this header is not covered by any + * section, the debug table must be emitted elsewhere. So stick it in + * the .init.rodata section instead. + * + * Note that the EFI debug entry itself may legally have a zero RVA, + * which means we can simply put it right after the section headers. + */ + __INITRODATA + + .align 2 +efi_debug_table: + // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY + .long 0 // Characteristics + .long 0 // TimeDateStamp + .short 0 // MajorVersion + .short 0 // MinorVersion + .long IMAGE_DEBUG_TYPE_CODEVIEW // Type + .long efi_debug_entry_size // SizeOfData + .long 0 // RVA + .long efi_debug_entry - _head // FileOffset + + .set efi_debug_table_size, . - efi_debug_table + .previous + +efi_debug_entry: + // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY + .ascii "NB10" // Signature + .long 0 // Unknown + .long 0 // Unknown2 + .long 0 // Unknown3 + + .asciz VMLINUX_PATH + + .set efi_debug_entry_size, . - efi_debug_entry +#endif + + /* + * EFI will load .text onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that .text is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 +efi_header_end: + .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4fb6ccd886d1..973df7de7bf8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -42,6 +42,8 @@ #include <asm/thread_info.h> #include <asm/virt.h> +#include "efi-header.S" + #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #if (TEXT_OFFSET & 0xfff) != 0 @@ -89,166 +91,14 @@ _head: .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved - .byte 0x41 // Magic number, "ARM\x64" - .byte 0x52 - .byte 0x4d - .byte 0x64 + .ascii "ARM\x64" // Magic number #ifdef CONFIG_EFI .long pe_header - _head // Offset to the PE header. -#else - .word 0 // reserved -#endif -#ifdef CONFIG_EFI - .align 3 pe_header: - .ascii "PE" - .short 0 -coff_header: - .short 0xaa64 // AArch64 - .short 2 // nr_sections - .long 0 // TimeDateStamp - .long 0 // PointerToSymbolTable - .long 1 // NumberOfSymbols - .short section_table - optional_header // SizeOfOptionalHeader - .short 0x206 // Characteristics. - // IMAGE_FILE_DEBUG_STRIPPED | - // IMAGE_FILE_EXECUTABLE_IMAGE | - // IMAGE_FILE_LINE_NUMS_STRIPPED -optional_header: - .short 0x20b // PE32+ format - .byte 0x02 // MajorLinkerVersion - .byte 0x14 // MinorLinkerVersion - .long _end - efi_header_end // SizeOfCode - .long 0 // SizeOfInitializedData - .long 0 // SizeOfUninitializedData - .long __efistub_entry - _head // AddressOfEntryPoint - .long efi_header_end - _head // BaseOfCode - -extra_header_fields: - .quad 0 // ImageBase - .long 0x1000 // SectionAlignment - .long PECOFF_FILE_ALIGNMENT // FileAlignment - .short 0 // MajorOperatingSystemVersion - .short 0 // MinorOperatingSystemVersion - .short 0 // MajorImageVersion - .short 0 // MinorImageVersion - .short 0 // MajorSubsystemVersion - .short 0 // MinorSubsystemVersion - .long 0 // Win32VersionValue - - .long _end - _head // SizeOfImage - - // Everything before the kernel image is considered part of the header - .long efi_header_end - _head // SizeOfHeaders - .long 0 // CheckSum - .short 0xa // Subsystem (EFI application) - .short 0 // DllCharacteristics - .quad 0 // SizeOfStackReserve - .quad 0 // SizeOfStackCommit - .quad 0 // SizeOfHeapReserve - .quad 0 // SizeOfHeapCommit - .long 0 // LoaderFlags - .long (section_table - .) / 8 // NumberOfRvaAndSizes - - .quad 0 // ExportTable - .quad 0 // ImportTable - .quad 0 // ResourceTable - .quad 0 // ExceptionTable - .quad 0 // CertificationTable - .quad 0 // BaseRelocationTable - -#ifdef CONFIG_DEBUG_EFI - .long efi_debug_table - _head // DebugTable - .long efi_debug_table_size -#endif - - // Section table -section_table: - - /* - * The EFI application loader requires a relocation section - * because EFI applications must be relocatable. This is a - * dummy section as far as we are concerned. - */ - .ascii ".reloc" - .byte 0 - .byte 0 // end of 0 padding of section name - .long 0 - .long 0 - .long 0 // SizeOfRawData - .long 0 // PointerToRawData - .long 0 // PointerToRelocations - .long 0 // PointerToLineNumbers - .short 0 // NumberOfRelocations - .short 0 // NumberOfLineNumbers - .long 0x42100040 // Characteristics (section flags) - - - .ascii ".text" - .byte 0 - .byte 0 - .byte 0 // end of 0 padding of section name - .long _end - efi_header_end // VirtualSize - .long efi_header_end - _head // VirtualAddress - .long _edata - efi_header_end // SizeOfRawData - .long efi_header_end - _head // PointerToRawData - - .long 0 // PointerToRelocations (0 for executables) - .long 0 // PointerToLineNumbers (0 for executables) - .short 0 // NumberOfRelocations (0 for executables) - .short 0 // NumberOfLineNumbers (0 for executables) - .long 0xe0500020 // Characteristics (section flags) - -#ifdef CONFIG_DEBUG_EFI - /* - * The debug table is referenced via its Relative Virtual Address (RVA), - * which is only defined for those parts of the image that are covered - * by a section declaration. Since this header is not covered by any - * section, the debug table must be emitted elsewhere. So stick it in - * the .init.rodata section instead. - * - * Note that the EFI debug entry itself may legally have a zero RVA, - * which means we can simply put it right after the section headers. - */ - __INITRODATA - - .align 2 -efi_debug_table: - // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY - .long 0 // Characteristics - .long 0 // TimeDateStamp - .short 0 // MajorVersion - .short 0 // MinorVersion - .long 2 // Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW - .long efi_debug_entry_size // SizeOfData - .long 0 // RVA - .long efi_debug_entry - _head // FileOffset - - .set efi_debug_table_size, . - efi_debug_table - .previous - -efi_debug_entry: - // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY - .ascii "NB10" // Signature - .long 0 // Unknown - .long 0 // Unknown2 - .long 0 // Unknown3 - - .asciz VMLINUX_PATH - - .set efi_debug_entry_size, . - efi_debug_entry -#endif - - /* - * EFI will load .text onwards at the 4k section alignment - * described in the PE/COFF header. To ensure that instruction - * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that .text is - * placed at a 4k boundary in the Image to begin with. - */ - .align 12 -efi_header_end: + __EFI_PE_HEADER +#else + .long 0 // reserved #endif __INIT @@ -534,13 +384,8 @@ ENTRY(kimage_vaddr) ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 -CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 -CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 - msr sctlr_el2, x0 - b 2f -1: mrs x0, sctlr_el1 + b.eq 1f + mrs x0, sctlr_el1 CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr sctlr_el1, x0 @@ -548,7 +393,11 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 isb ret -2: +1: mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + #ifdef CONFIG_ARM64_VHE /* * Check for VHE being present. For the rest of the EL2 setup, @@ -594,14 +443,14 @@ set_hcr: cmp x0, #1 b.ne 3f - mrs_s x0, ICC_SRE_EL2 + mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr_s ICC_SRE_EL2, x0 + msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set - mrs_s x0, ICC_SRE_EL2 // Read SRE back, + mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, 3f // and check that it sticks - msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults 3: #endif @@ -612,26 +461,6 @@ set_hcr: msr vpidr_el2, x0 msr vmpidr_el2, x1 - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - cbnz x2, 1f - - /* sctlr_el1 */ - mov x0, #0x0800 // Set/clear RES{1,0} bits -CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems -CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems - msr sctlr_el1, x0 - - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 -1: - #ifdef CONFIG_COMPAT msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif @@ -668,6 +497,23 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems ret install_el2_stub: + /* + * When VHE is not in use, early init of EL2 and EL1 needs to be + * done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ + /* sctlr_el1 */ + mov x0, #0x0800 // Set/clear RES{1,0} bits +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + msr sctlr_el1, x0 + + /* Coprocessor traps. */ + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 + /* Hypervisor stub */ adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 97a7384100f3..a44e13942d30 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -28,6 +28,7 @@ #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/irqflags.h> +#include <asm/kexec.h> #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> @@ -102,7 +103,8 @@ int pfn_is_nosave(unsigned long pfn) unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin); unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1); - return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); + return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) || + crash_is_nosave(pfn); } void notrace save_processor_state(void) @@ -286,6 +288,9 @@ int swsusp_arch_suspend(void) local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { + /* make the crash dump kernel image visible/saveable */ + crash_prepare_suspend(); + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { @@ -297,6 +302,9 @@ int swsusp_arch_suspend(void) if (el2_reset_needed()) dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); + /* make the crash dump kernel image protected again */ + crash_post_resume(); + /* * Tell the hibernation core that we've just restored * the memory diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index bc96c8a7fc79..481f54a866c5 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -9,12 +9,19 @@ * published by the Free Software Foundation. */ +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> #include <linux/kexec.h> +#include <linux/page-flags.h> #include <linux/smp.h> #include <asm/cacheflush.h> #include <asm/cpu_ops.h> +#include <asm/memory.h> +#include <asm/mmu.h> #include <asm/mmu_context.h> +#include <asm/page.h> #include "cpu-reset.h" @@ -22,8 +29,6 @@ extern const unsigned char arm64_relocate_new_kernel[]; extern const unsigned long arm64_relocate_new_kernel_size; -static unsigned long kimage_start; - /** * kexec_image_info - For debugging output. */ @@ -64,8 +69,6 @@ void machine_kexec_cleanup(struct kimage *kimage) */ int machine_kexec_prepare(struct kimage *kimage) { - kimage_start = kimage->start; - kexec_image_info(kimage); if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { @@ -144,11 +147,15 @@ void machine_kexec(struct kimage *kimage) { phys_addr_t reboot_code_buffer_phys; void *reboot_code_buffer; + bool in_kexec_crash = (kimage == kexec_crash_image); + bool stuck_cpus = cpus_are_stuck_in_kernel(); /* * New cpus may have become stuck_in_kernel after we loaded the image. */ - BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1)); + BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); + WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), + "Some CPUs may be stale, kdump will be unreliable.\n"); reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); @@ -183,7 +190,7 @@ void machine_kexec(struct kimage *kimage) kexec_list_flush(kimage); /* Flush the new image if already in place. */ - if (kimage->head & IND_DONE) + if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) kexec_segment_flush(kimage); pr_info("Bye!\n"); @@ -200,13 +207,158 @@ void machine_kexec(struct kimage *kimage) * relocation is complete. */ - cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, - kimage_start, 0); + cpu_soft_restart(kimage != kexec_crash_image, + reboot_code_buffer_phys, kimage->head, kimage->start, 0); BUG(); /* Should never get here. */ } +static void machine_kexec_mask_interrupts(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + +/** + * machine_crash_shutdown - shutdown non-crashing cpus and save registers + */ void machine_crash_shutdown(struct pt_regs *regs) { - /* Empty routine needed to avoid build errors. */ + local_irq_disable(); + + /* shutdown non-crashing cpus */ + smp_send_crash_stop(); + + /* for crashing cpu */ + crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + + pr_info("Starting crashdump kernel...\n"); +} + +void arch_kexec_protect_crashkres(void) +{ + int i; + + kexec_segment_flush(kexec_crash_image); + + for (i = 0; i < kexec_crash_image->nr_segments; i++) + set_memory_valid( + __phys_to_virt(kexec_crash_image->segment[i].mem), + kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0); +} + +void arch_kexec_unprotect_crashkres(void) +{ + int i; + + for (i = 0; i < kexec_crash_image->nr_segments; i++) + set_memory_valid( + __phys_to_virt(kexec_crash_image->segment[i].mem), + kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1); +} + +#ifdef CONFIG_HIBERNATION +/* + * To preserve the crash dump kernel image, the relevant memory segments + * should be mapped again around the hibernation. + */ +void crash_prepare_suspend(void) +{ + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); +} + +void crash_post_resume(void) +{ + if (kexec_crash_image) + arch_kexec_protect_crashkres(); +} + +/* + * crash_is_nosave + * + * Return true only if a page is part of reserved memory for crash dump kernel, + * but does not hold any data of loaded kernel image. + * + * Note that all the pages in crash dump kernel memory have been initially + * marked as Reserved in kexec_reserve_crashkres_pages(). + * + * In hibernation, the pages which are Reserved and yet "nosave" are excluded + * from the hibernation iamge. crash_is_nosave() does thich check for crash + * dump kernel and will reduce the total size of hibernation image. + */ + +bool crash_is_nosave(unsigned long pfn) +{ + int i; + phys_addr_t addr; + + if (!crashk_res.end) + return false; + + /* in reserved memory? */ + addr = __pfn_to_phys(pfn); + if ((addr < crashk_res.start) || (crashk_res.end < addr)) + return false; + + if (!kexec_crash_image) + return true; + + /* not part of loaded kernel image? */ + for (i = 0; i < kexec_crash_image->nr_segments; i++) + if (addr >= kexec_crash_image->segment[i].mem && + addr < (kexec_crash_image->segment[i].mem + + kexec_crash_image->segment[i].memsz)) + return false; + + return true; +} + +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + struct page *page; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + page = phys_to_page(addr); + ClearPageReserved(page); + free_reserved_page(page); + } +} +#endif /* CONFIG_HIBERNATION */ + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", + kimage_voffset); + vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", + PHYS_OFFSET); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 043d373b8369..ae2a835898d7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -205,12 +205,10 @@ void __show_regs(struct pt_regs *regs) pr_cont("\n"); } - printk("\n"); } void show_regs(struct pt_regs * regs) { - printk("\n"); __show_regs(regs); } diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c new file mode 100644 index 000000000000..c124752a8bd3 --- /dev/null +++ b/arch/arm64/kernel/reloc_test_core.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> + +int sym64_rel; + +#define SYM64_ABS_VAL 0xffff880000cccccc +#define SYM32_ABS_VAL 0xf800cccc +#define SYM16_ABS_VAL 0xf8cc + +#define __SET_ABS(name, val) asm(".globl " #name "; .set "#name ", " #val) +#define SET_ABS(name, val) __SET_ABS(name, val) + +SET_ABS(sym64_abs, SYM64_ABS_VAL); +SET_ABS(sym32_abs, SYM32_ABS_VAL); +SET_ABS(sym16_abs, SYM16_ABS_VAL); + +asmlinkage u64 absolute_data64(void); +asmlinkage u64 absolute_data32(void); +asmlinkage u64 absolute_data16(void); +asmlinkage u64 signed_movw(void); +asmlinkage u64 unsigned_movw(void); +asmlinkage u64 relative_adrp(void); +asmlinkage u64 relative_adr(void); +asmlinkage u64 relative_data64(void); +asmlinkage u64 relative_data32(void); +asmlinkage u64 relative_data16(void); + +static struct { + char name[32]; + u64 (*f)(void); + u64 expect; +} const funcs[] = { + { "R_AARCH64_ABS64", absolute_data64, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_ABS32", absolute_data32, UL(SYM32_ABS_VAL) }, + { "R_AARCH64_ABS16", absolute_data16, UL(SYM16_ABS_VAL) }, + { "R_AARCH64_MOVW_SABS_Gn", signed_movw, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_MOVW_UABS_Gn", unsigned_movw, UL(SYM64_ABS_VAL) }, +#ifndef CONFIG_ARM64_ERRATUM_843419 + { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp, (u64)&sym64_rel }, +#endif + { "R_AARCH64_ADR_PREL_LO21", relative_adr, (u64)&sym64_rel }, + { "R_AARCH64_PREL64", relative_data64, (u64)&sym64_rel }, + { "R_AARCH64_PREL32", relative_data32, (u64)&sym64_rel }, + { "R_AARCH64_PREL16", relative_data16, (u64)&sym64_rel }, +}; + +static int reloc_test_init(void) +{ + int i; + + pr_info("Relocation test:\n"); + pr_info("-------------------------------------------------------\n"); + + for (i = 0; i < ARRAY_SIZE(funcs); i++) { + u64 ret = funcs[i].f(); + + pr_info("%-31s 0x%016llx %s\n", funcs[i].name, ret, + ret == funcs[i].expect ? "pass" : "fail"); + if (ret != funcs[i].expect) + pr_err("Relocation failed, expected 0x%016llx, not 0x%016llx\n", + funcs[i].expect, ret); + } + return 0; +} + +static void reloc_test_exit(void) +{ +} + +module_init(reloc_test_init); +module_exit(reloc_test_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S new file mode 100644 index 000000000000..e1edcefeb02d --- /dev/null +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/linkage.h> + +ENTRY(absolute_data64) + ldr x0, 0f + ret +0: .quad sym64_abs +ENDPROC(absolute_data64) + +ENTRY(absolute_data32) + ldr w0, 0f + ret +0: .long sym32_abs +ENDPROC(absolute_data32) + +ENTRY(absolute_data16) + adr x0, 0f + ldrh w0, [x0] + ret +0: .short sym16_abs, 0 +ENDPROC(absolute_data16) + +ENTRY(signed_movw) + movz x0, #:abs_g2_s:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(signed_movw) + +ENTRY(unsigned_movw) + movz x0, #:abs_g3:sym64_abs + movk x0, #:abs_g2_nc:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(unsigned_movw) + +#ifndef CONFIG_ARM64_ERRATUM_843419 + +ENTRY(relative_adrp) + adrp x0, sym64_rel + add x0, x0, #:lo12:sym64_rel + ret +ENDPROC(relative_adrp) + +#endif + +ENTRY(relative_adr) + adr x0, sym64_rel + ret +ENDPROC(relative_adr) + +ENTRY(relative_data64) + adr x1, 0f + ldr x0, [x1] + add x0, x0, x1 + ret +0: .quad sym64_rel - . +ENDPROC(relative_data64) + +ENTRY(relative_data32) + adr x1, 0f + ldr w0, [x1] + add x0, x0, x1 + ret +0: .long sym64_rel - . +ENDPROC(relative_data32) + +ENTRY(relative_data16) + adr x1, 0f + ldrsh w0, [x1] + add x0, x0, x1 + ret +0: .short sym64_rel - ., 0 +ENDPROC(relative_data16) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 42274bda0ccb..28855ec1be95 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -31,7 +31,6 @@ #include <linux/screen_info.h> #include <linux/init.h> #include <linux/kexec.h> -#include <linux/crash_dump.h> #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/interrupt.h> @@ -226,6 +225,12 @@ static void __init request_standard_resources(void) if (kernel_data.start >= res->start && kernel_data.end <= res->end) request_resource(res, &kernel_data); +#ifdef CONFIG_KEXEC_CORE + /* Userspace will find "Crash kernel" region in /proc/iomem. */ + if (crashk_res.end && crashk_res.start >= res->start && + crashk_res.end <= res->end) + request_resource(res, &crashk_res); +#endif } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ef1caae02110..ffee4e454ac5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <linux/completion.h> #include <linux/of.h> #include <linux/irq_work.h> +#include <linux/kexec.h> #include <asm/alternative.h> #include <asm/atomic.h> @@ -76,6 +77,7 @@ enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_TIMER, IPI_IRQ_WORK, IPI_WAKEUP @@ -434,6 +436,7 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_cpu_features(); hyp_mode_check(); apply_alternatives_all(); + mark_linear_text_alias_ro(); } void __init smp_prepare_boot_cpu(void) @@ -755,6 +758,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), S(IPI_WAKEUP, "CPU wake-up interrupts"), @@ -829,6 +833,29 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); +#endif + +static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ +#ifdef CONFIG_KEXEC_CORE + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); +#endif + + /* just in case */ + cpu_park_loop(); +#endif +} + /* * Main handler for inter-processor interrupts */ @@ -859,6 +886,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_CPU_CRASH_STOP: + if (IS_ENABLED(CONFIG_KEXEC_CORE)) { + irq_enter(); + ipi_cpu_crash_stop(cpu, regs); + + unreachable(); + } + break; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: irq_enter(); @@ -931,6 +967,39 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +void smp_send_crash_stop(void) +{ + cpumask_t mask; + unsigned long timeout; + + if (num_online_cpus() == 1) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + + pr_crit("SMP: stopping secondary CPUs\n"); + smp_cross_call(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + /* * not supported here */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b8deffa9e1bf..987a00ee446c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -143,12 +143,27 @@ SECTIONS . = ALIGN(SEGMENT_ALIGN); __init_begin = .; + __inittext_begin = .; INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { + *(.altinstr_replacement) + } + + . = ALIGN(PAGE_SIZE); + __inittext_end = .; + __initdata_begin = .; + .init.data : { INIT_DATA INIT_SETUP(16) @@ -164,15 +179,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(4); - .altinstructions : { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : { - *(.altinstr_replacement) - } .rela : ALIGN(8) { *(.rela .rela*) } @@ -181,6 +187,7 @@ SECTIONS __rela_size = SIZEOF(.rela); . = ALIGN(SEGMENT_ALIGN); + __initdata_end = .; __init_end = .; _data = .; @@ -206,6 +213,7 @@ SECTIONS } PECOFF_EDATA_PADDING + __pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin); _edata = .; BSS_SECTION(0, 0, 0) @@ -221,6 +229,7 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; STABS_DEBUG diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 9e1d2b75eecd..73464a96c365 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -94,6 +94,28 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (!has_vhe() && icache_is_vpipt()) + __flush_icache_all(); + __tlb_switch_to_host()(kvm); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d9e9697de1b2..561badf93de8 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -60,7 +60,7 @@ static bool cpu_has_32bit_el1(void) { u64 pfr0; - pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return !!(pfr0 & 0x20); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0e26f8c2b56f..26b0e77878b5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1183,8 +1183,8 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); - u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); + u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); + u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT); p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 68634c630cdd..ab9f5f0fb2c7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -119,9 +119,6 @@ static void flush_context(unsigned int cpu) /* Queue a TLB invalidate and flush the I-cache if necessary. */ cpumask_setall(&tlb_flush_pending); - - if (icache_is_aivivt()) - __flush_icache_all(); } static bool check_update_reserved_asid(u64 asid, u64 newasid) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 81cdb2e844ed..f7b54019ef55 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -584,20 +584,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, */ gfp |= __GFP_ZERO; - if (gfpflags_allow_blocking(gfp)) { - struct page **pages; - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); - - pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, - handle, flush_page); - if (!pages) - return NULL; - - addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, - __builtin_return_address(0)); - if (!addr) - iommu_dma_free(dev, pages, iosize, handle); - } else { + if (!gfpflags_allow_blocking(gfp)) { struct page *page; /* * In atomic context we can't remap anything, so we'll only @@ -621,6 +608,45 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, __free_from_pool(addr, size); addr = NULL; } + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page *page; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size), gfp); + if (!page) + return NULL; + + *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); + if (iommu_dma_mapping_error(dev, *handle)) { + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + return NULL; + } + if (!coherent) + __dma_flush_area(page_to_virt(page), iosize); + + addr = dma_common_contiguous_remap(page, size, VM_USERMAP, + prot, + __builtin_return_address(0)); + if (!addr) { + iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + } + } else { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page **pages; + + pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, + handle, flush_page); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + iommu_dma_free(dev, pages, iosize, handle); } return addr; } @@ -632,7 +658,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); /* - * @cpu_addr will be one of 3 things depending on how it was allocated: + * @cpu_addr will be one of 4 things depending on how it was allocated: + * - A remapped array of pages for contiguous allocations. * - A remapped array of pages from iommu_dma_alloc(), for all * non-atomic allocations. * - A non-cacheable alias from the atomic pool, for atomic @@ -644,6 +671,12 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (__in_atomic_pool(cpu_addr, size)) { iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_from_pool(cpu_addr, size); + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + struct page *page = vmalloc_to_page(cpu_addr); + + iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4bf899fb451b..61114b7b6b9e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -161,12 +161,33 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } +static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, + unsigned long addr) +{ + unsigned int ec = ESR_ELx_EC(esr); + unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + + if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) + return false; + + if (fsc_type == ESR_ELx_FSC_PERM) + return true; + + if (addr < USER_DS && system_uses_ttbr0_pan()) + return fsc_type == ESR_ELx_FSC_FAULT && + (regs->pstate & PSR_PAN_BIT); + + return false; +} + /* * The kernel tried to access some page that wasn't present. */ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int esr, struct pt_regs *regs) { + const char *msg; + /* * Are we prepared to handle this kernel fault? * We are almost certainly not prepared to handle instruction faults. @@ -178,9 +199,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, * No handler, we'll have to terminate things with extreme prejudice. */ bust_spinlocks(1); - pr_alert("Unable to handle kernel %s at virtual address %08lx\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); + + if (is_permission_fault(esr, regs, addr)) { + if (esr & ESR_ELx_WNR) + msg = "write to read-only memory"; + else + msg = "read from unreadable memory"; + } else if (addr < PAGE_SIZE) { + msg = "NULL pointer dereference"; + } else { + msg = "paging request"; + } + + pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, + addr); show_pte(mm, addr); die("Oops", regs, esr); @@ -270,21 +302,6 @@ out: return fault; } -static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs) -{ - unsigned int ec = ESR_ELx_EC(esr); - unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; - - if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) - return false; - - if (system_uses_ttbr0_pan()) - return fsc_type == ESR_ELx_FSC_FAULT && - (regs->pstate & PSR_PAN_BIT); - else - return fsc_type == ESR_ELx_FSC_PERM; -} - static bool is_el0_instruction_abort(unsigned int esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; @@ -322,7 +339,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < USER_DS && is_permission_fault(esr, regs)) { + if (addr < USER_DS && is_permission_fault(esr, regs, addr)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 554a2558c12e..21a8d828cbf4 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -22,7 +22,7 @@ #include <linux/pagemap.h> #include <asm/cacheflush.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/tlbflush.h> void sync_icache_aliases(void *kaddr, unsigned long len) @@ -65,8 +65,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); - else if (icache_is_aivivt()) - __flush_icache_all(); } /* diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e19e06593e37..5960bef0170d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/sort.h> +#include <linux/of.h> #include <linux/of_fdt.h> #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> @@ -37,6 +38,8 @@ #include <linux/swiotlb.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> #include <asm/boot.h> #include <asm/fixmap.h> @@ -77,6 +80,142 @@ static int __init early_initrd(char *p) early_param("initrd", early_initrd); #endif +#ifdef CONFIG_KEXEC_CORE +/* + * reserve_crashkernel() - reserves memory for crash kernel + * + * This function reserves memory area given in "crashkernel=" kernel command + * line parameter. The memory reserved is used by dump capture kernel when + * primary kernel is crashing. + */ +static void __init reserve_crashkernel(void) +{ + unsigned long long crash_base, crash_size; + int ret; + + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + &crash_size, &crash_base); + /* no crashkernel= or invalid value specified */ + if (ret || !crash_size) + return; + + crash_size = PAGE_ALIGN(crash_size); + + if (crash_base == 0) { + /* Current arm64 boot protocol requires 2MB alignment */ + crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, + crash_size, SZ_2M); + if (crash_base == 0) { + pr_warn("cannot allocate crashkernel (size:0x%llx)\n", + crash_size); + return; + } + } else { + /* User specifies base address explicitly. */ + if (!memblock_is_region_memory(crash_base, crash_size)) { + pr_warn("cannot reserve crashkernel: region is not memory\n"); + return; + } + + if (memblock_is_region_reserved(crash_base, crash_size)) { + pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n"); + return; + } + + if (!IS_ALIGNED(crash_base, SZ_2M)) { + pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n"); + return; + } + } + memblock_reserve(crash_base, crash_size); + + pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + crash_base, crash_base + crash_size, crash_size >> 20); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} + +static void __init kexec_reserve_crashkres_pages(void) +{ +#ifdef CONFIG_HIBERNATION + phys_addr_t addr; + struct page *page; + + if (!crashk_res.end) + return; + + /* + * To reduce the size of hibernation image, all the pages are + * marked as Reserved initially. + */ + for (addr = crashk_res.start; addr < (crashk_res.end + 1); + addr += PAGE_SIZE) { + page = phys_to_page(addr); + SetPageReserved(page); + } +#endif +} +#else +static void __init reserve_crashkernel(void) +{ +} + +static void __init kexec_reserve_crashkres_pages(void) +{ +} +#endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_CRASH_DUMP +static int __init early_init_dt_scan_elfcorehdr(unsigned long node, + const char *uname, int depth, void *data) +{ + const __be32 *reg; + int len; + + if (depth != 1 || strcmp(uname, "chosen") != 0) + return 0; + + reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len); + if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) + return 1; + + elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, ®); + elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, ®); + + return 1; +} + +/* + * reserve_elfcorehdr() - reserves memory for elf core header + * + * This function reserves the memory occupied by an elf core header + * described in the device tree. This region contains all the + * information about primary kernel's core image and is used by a dump + * capture kernel to access the system memory on primary kernel. + */ +static void __init reserve_elfcorehdr(void) +{ + of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL); + + if (!elfcorehdr_size) + return; + + if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) { + pr_warn("elfcorehdr is overlapped\n"); + return; + } + + memblock_reserve(elfcorehdr_addr, elfcorehdr_size); + + pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n", + elfcorehdr_size >> 10, elfcorehdr_addr); +} +#else +static void __init reserve_elfcorehdr(void) +{ +} +#endif /* CONFIG_CRASH_DUMP */ /* * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It * currently assumes that for memory starting above 4G, 32-bit devices will @@ -188,10 +327,45 @@ static int __init early_mem(char *p) } early_param("mem", early_mem); +static int __init early_init_dt_scan_usablemem(unsigned long node, + const char *uname, int depth, void *data) +{ + struct memblock_region *usablemem = data; + const __be32 *reg; + int len; + + if (depth != 1 || strcmp(uname, "chosen") != 0) + return 0; + + reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len); + if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) + return 1; + + usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ®); + usablemem->size = dt_mem_next_cell(dt_root_size_cells, ®); + + return 1; +} + +static void __init fdt_enforce_memory_region(void) +{ + struct memblock_region reg = { + .size = 0, + }; + + of_scan_flat_dt(early_init_dt_scan_usablemem, ®); + + if (reg.size) + memblock_cap_memory_range(reg.base, reg.size); +} + void __init arm64_memblock_init(void) { const s64 linear_region_size = -(s64)PAGE_OFFSET; + /* Handle linux,usable-memory-range property */ + fdt_enforce_memory_region(); + /* * Ensure that the linear region takes up exactly half of the kernel * virtual address space. This way, we can distinguish a linear address @@ -297,6 +471,11 @@ void __init arm64_memblock_init(void) arm64_dma_phys_limit = max_zone_dma_phys(); else arm64_dma_phys_limit = PHYS_MASK + 1; + + reserve_crashkernel(); + + reserve_elfcorehdr(); + dma_contiguous_reserve(arm64_dma_phys_limit); memblock_allow_resize(); @@ -416,6 +595,8 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ free_all_bootmem(); + kexec_reserve_crashkres_pages(); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d28dbcf596b6..0c429ec6fde8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -22,6 +22,8 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/ioport.h> +#include <linux/kexec.h> #include <linux/libfdt.h> #include <linux/mman.h> #include <linux/nodemask.h> @@ -43,6 +45,9 @@ #include <asm/mmu_context.h> #include <asm/ptdump.h> +#define NO_BLOCK_MAPPINGS BIT(0) +#define NO_CONT_MAPPINGS BIT(1) + u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 kimage_voffset __ro_after_init; @@ -103,33 +108,27 @@ static bool pgattr_change_is_safe(u64 old, u64 new) */ static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE; - return old == 0 || new == 0 || ((old ^ new) & ~mask) == 0; + /* creating or taking down mappings is always safe */ + if (old == 0 || new == 0) + return true; + + /* live contiguous mappings may not be manipulated at all */ + if ((old | new) & PTE_CONT) + return false; + + return ((old ^ new) & ~mask) == 0; } -static void alloc_init_pte(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long pfn, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) +static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot) { pte_t *pte; - BUG_ON(pmd_sect(*pmd)); - if (pmd_none(*pmd)) { - phys_addr_t pte_phys; - BUG_ON(!pgtable_alloc); - pte_phys = pgtable_alloc(); - pte = pte_set_fixmap(pte_phys); - __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); - pte_clear_fixmap(); - } - BUG_ON(pmd_bad(*pmd)); - pte = pte_set_fixmap_offset(pmd, addr); do { pte_t old_pte = *pte; - set_pte(pte, pfn_pte(pfn, prot)); - pfn++; + set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot)); /* * After the PTE entry has been populated once, we @@ -137,32 +136,51 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, */ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte))); + phys += PAGE_SIZE; } while (pte++, addr += PAGE_SIZE, addr != end); pte_clear_fixmap(); } -static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) +static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + int flags) { - pmd_t *pmd; unsigned long next; - /* - * Check for initial section mappings in the pgd/pud and remove them. - */ - BUG_ON(pud_sect(*pud)); - if (pud_none(*pud)) { - phys_addr_t pmd_phys; + BUG_ON(pmd_sect(*pmd)); + if (pmd_none(*pmd)) { + phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); - pmd_phys = pgtable_alloc(); - pmd = pmd_set_fixmap(pmd_phys); - __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); - pmd_clear_fixmap(); + pte_phys = pgtable_alloc(); + __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); } - BUG_ON(pud_bad(*pud)); + BUG_ON(pmd_bad(*pmd)); + + do { + pgprot_t __prot = prot; + + next = pte_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + init_pte(pmd, addr, next, phys, __prot); + + phys += next - addr; + } while (addr = next, addr != end); +} + +static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), int flags) +{ + unsigned long next; + pmd_t *pmd; pmd = pmd_set_fixmap_offset(pud, addr); do { @@ -172,7 +190,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - !page_mappings_only) { + (flags & NO_BLOCK_MAPPINGS) == 0) { pmd_set_huge(pmd, phys, prot); /* @@ -182,8 +200,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), pmd_val(*pmd))); } else { - alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, pgtable_alloc); + alloc_init_cont_pte(pmd, addr, next, phys, prot, + pgtable_alloc, flags); BUG_ON(pmd_val(old_pmd) != 0 && pmd_val(old_pmd) != pmd_val(*pmd)); @@ -194,6 +212,41 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, pmd_clear_fixmap(); } +static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), int flags) +{ + unsigned long next; + + /* + * Check for initial section mappings in the pgd/pud. + */ + BUG_ON(pud_sect(*pud)); + if (pud_none(*pud)) { + phys_addr_t pmd_phys; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(); + __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); + } + BUG_ON(pud_bad(*pud)); + + do { + pgprot_t __prot = prot; + + next = pmd_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags); + + phys += next - addr; + } while (addr = next, addr != end); +} + static inline bool use_1G_block(unsigned long addr, unsigned long next, unsigned long phys) { @@ -209,7 +262,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) + int flags) { pud_t *pud; unsigned long next; @@ -231,7 +284,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && !page_mappings_only) { + if (use_1G_block(addr, next, phys) && + (flags & NO_BLOCK_MAPPINGS) == 0) { pud_set_huge(pud, phys, prot); /* @@ -241,8 +295,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), pud_val(*pud))); } else { - alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc, page_mappings_only); + alloc_init_cont_pmd(pud, addr, next, phys, prot, + pgtable_alloc, flags); BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != pud_val(*pud)); @@ -257,7 +311,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) + int flags) { unsigned long addr, length, end, next; pgd_t *pgd = pgd_offset_raw(pgdir, virt); @@ -277,7 +331,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, do { next = pgd_addr_end(addr, end); alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, - page_mappings_only); + flags); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -306,82 +360,80 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only) { + int flags = 0; + BUG_ON(mm == &init_mm); + if (page_mappings_only) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, page_mappings_only); + pgd_pgtable_alloc, flags); } -static void create_mapping_late(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) +static void update_mapping_prot(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { - pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL, debug_pagealloc_enabled()); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + + /* flush the TLBs after updating live kernel mappings */ + flush_tlb_kernel_range(virt, virt + size); } -static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) +static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, + phys_addr_t end, pgprot_t prot, int flags) { - phys_addr_t kernel_start = __pa_symbol(_text); - phys_addr_t kernel_end = __pa_symbol(__init_begin); - - /* - * Take care not to create a writable alias for the - * read-only text and rodata sections of the kernel image. - */ - - /* No overlap with the kernel text/rodata */ - if (end < kernel_start || start >= kernel_end) { - __create_pgd_mapping(pgd, start, __phys_to_virt(start), - end - start, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); - return; - } - - /* - * This block overlaps the kernel text/rodata mappings. - * Map the portion(s) which don't overlap. - */ - if (start < kernel_start) - __create_pgd_mapping(pgd, start, - __phys_to_virt(start), - kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); - if (kernel_end < end) - __create_pgd_mapping(pgd, kernel_end, - __phys_to_virt(kernel_end), - end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); + __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); +} +void __init mark_linear_text_alias_ro(void) +{ /* - * Map the linear alias of the [_text, __init_begin) interval as - * read-only/non-executable. This makes the contents of the - * region accessible to subsystems such as hibernate, but - * protects it from inadvertent modification or execution. + * Remove the write permissions from the linear alias of .text/.rodata */ - __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), - kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc, debug_pagealloc_enabled()); + update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text), + (unsigned long)__init_begin - (unsigned long)_text, + PAGE_KERNEL_RO); } static void __init map_mem(pgd_t *pgd) { + phys_addr_t kernel_start = __pa_symbol(_text); + phys_addr_t kernel_end = __pa_symbol(__init_begin); struct memblock_region *reg; + int flags = 0; + + if (debug_pagealloc_enabled()) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + /* + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. + * So temporarily mark them as NOMAP to skip mappings in + * the following for-loop + */ + memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); +#endif /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -393,33 +445,57 @@ static void __init map_mem(pgd_t *pgd) if (memblock_is_nomap(reg)) continue; - __map_memblock(pgd, start, end); + __map_memblock(pgd, start, end, PAGE_KERNEL, flags); + } + + /* + * Map the linear alias of the [_text, __init_begin) interval + * as non-executable now, and remove the write permission in + * mark_linear_text_alias_ro() below (which will be called after + * alternative patching has completed). This makes the contents + * of the region accessible to subsystems such as hibernate, + * but protects it from inadvertent modification or execution. + * Note that contiguous mappings cannot be remapped in this way, + * so we should avoid them here. + */ + __map_memblock(pgd, kernel_start, kernel_end, + PAGE_KERNEL, NO_CONT_MAPPINGS); + memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + +#ifdef CONFIG_KEXEC_CORE + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ + if (crashk_res.end) { + __map_memblock(pgd, crashk_res.start, crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); } +#endif } void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)_etext - (unsigned long)_text; - create_mapping_late(__pa_symbol(_text), (unsigned long)_text, - section_size, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE. */ section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; - create_mapping_late(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, + update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); - /* flush the TLBs after updating live kernel mappings */ - flush_tlb_all(); - debug_checkwx(); } static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma) + pgprot_t prot, struct vm_struct *vma, + int flags) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; @@ -428,7 +504,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc, debug_pagealloc_enabled()); + early_pgtable_alloc, flags); vma->addr = va_start; vma->phys_addr = pa_start; @@ -439,18 +515,39 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, vm_area_add_early(vma); } +static int __init parse_rodata(char *arg) +{ + return strtobool(arg, &rodata_enabled); +} +early_param("rodata", parse_rodata); + /* * Create fine-grained mappings for the kernel. */ static void __init map_kernel(pgd_t *pgd) { - static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, + vmlinux_initdata, vmlinux_data; - map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); - map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, - &vmlinux_init); - map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + /* + * External debuggers may need to write directly to the text + * mapping to install SW breakpoints. Allow this (only) when + * explicitly requested with rodata=off. + */ + pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + + /* + * Only rodata will be remapped with different permissions later on, + * all other segments are allowed to use contiguous mappings. + */ + map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0); + map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, + &vmlinux_rodata, NO_CONT_MAPPINGS); + map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, + &vmlinux_inittext, 0); + map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, + &vmlinux_initdata, 0); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 8def55e7249b..3212ee0558f6 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -125,20 +125,23 @@ int set_memory_x(unsigned long addr, int numpages) } EXPORT_SYMBOL_GPL(set_memory_x); -#ifdef CONFIG_DEBUG_PAGEALLOC -void __kernel_map_pages(struct page *page, int numpages, int enable) +int set_memory_valid(unsigned long addr, int numpages, int enable) { - unsigned long addr = (unsigned long) page_address(page); - if (enable) - __change_memory_common(addr, PAGE_SIZE * numpages, + return __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(PTE_VALID), __pgprot(0)); else - __change_memory_common(addr, PAGE_SIZE * numpages, + return __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(0), __pgprot(PTE_VALID)); } + +#ifdef CONFIG_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + set_memory_valid((unsigned long)page_address(page), numpages, enable); +} #ifdef CONFIG_HIBERNATION /* * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 4a5bb967250b..22e08d272db7 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -225,7 +225,7 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type, if (iort_node->type == type && ACPI_SUCCESS(callback(iort_node, context))) - return iort_node; + return iort_node; iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node, iort_node->length); @@ -253,17 +253,15 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node, void *context) { struct device *dev = context; - acpi_status status; + acpi_status status = AE_NOT_FOUND; if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_device *adev = to_acpi_device_node(dev->fwnode); struct acpi_iort_named_component *ncomp; - if (!adev) { - status = AE_NOT_FOUND; + if (!adev) goto out; - } status = acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &buf); if (ACPI_FAILURE(status)) { @@ -289,8 +287,6 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node, */ status = pci_rc->pci_segment_number == pci_domain_nr(bus) ? AE_OK : AE_NOT_FOUND; - } else { - status = AE_NOT_FOUND; } out: return status; @@ -322,8 +318,7 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node, - u32 *id_out, u8 type_mask, - int index) + u32 *id_out, int index) { struct acpi_iort_node *parent; struct acpi_iort_id_mapping *map; @@ -345,9 +340,6 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node, parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table, map->output_reference); - if (!(IORT_TYPE_MASK(parent->type) & type_mask)) - return NULL; - if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) { if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT || node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) { @@ -359,11 +351,11 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node, return NULL; } -static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node, - u32 rid_in, u32 *rid_out, - u8 type_mask) +static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node, + u32 id_in, u32 *id_out, + u8 type_mask) { - u32 rid = rid_in; + u32 id = id_in; /* Parse the ID mapping tree to find specified node type */ while (node) { @@ -371,8 +363,8 @@ static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node, int i; if (IORT_TYPE_MASK(node->type) & type_mask) { - if (rid_out) - *rid_out = rid; + if (id_out) + *id_out = id; return node; } @@ -389,9 +381,9 @@ static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node, goto fail_map; } - /* Do the RID translation */ + /* Do the ID translation */ for (i = 0; i < node->mapping_count; i++, map++) { - if (!iort_id_map(map, node->type, rid, &rid)) + if (!iort_id_map(map, node->type, id, &id)) break; } @@ -403,13 +395,41 @@ static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node, } fail_map: - /* Map input RID to output RID unchanged on mapping failure*/ - if (rid_out) - *rid_out = rid_in; + /* Map input ID to output ID unchanged on mapping failure */ + if (id_out) + *id_out = id_in; return NULL; } +static +struct acpi_iort_node *iort_node_map_platform_id(struct acpi_iort_node *node, + u32 *id_out, u8 type_mask, + int index) +{ + struct acpi_iort_node *parent; + u32 id; + + /* step 1: retrieve the initial dev id */ + parent = iort_node_get_id(node, &id, index); + if (!parent) + return NULL; + + /* + * optional step 2: map the initial dev id if its parent is not + * the target type we want, map it again for the use cases such + * as NC (named component) -> SMMU -> ITS. If the type is matched, + * return the initial dev id and its parent pointer directly. + */ + if (!(IORT_TYPE_MASK(parent->type) & type_mask)) + parent = iort_node_map_id(parent, id, id_out, type_mask); + else + if (id_out) + *id_out = id; + + return parent; +} + static struct acpi_iort_node *iort_find_dev_node(struct device *dev) { struct pci_bus *pbus; @@ -443,13 +463,38 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id) if (!node) return req_id; - iort_node_map_rid(node, req_id, &dev_id, IORT_MSI_TYPE); + iort_node_map_id(node, req_id, &dev_id, IORT_MSI_TYPE); return dev_id; } /** + * iort_pmsi_get_dev_id() - Get the device id for a device + * @dev: The device for which the mapping is to be done. + * @dev_id: The device ID found. + * + * Returns: 0 for successful find a dev id, -ENODEV on error + */ +int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id) +{ + int i; + struct acpi_iort_node *node; + + node = iort_find_dev_node(dev); + if (!node) + return -ENODEV; + + for (i = 0; i < node->mapping_count; i++) { + if (iort_node_map_platform_id(node, dev_id, IORT_MSI_TYPE, i)) + return 0; + } + + return -ENODEV; +} + +/** * iort_dev_find_its_id() - Find the ITS identifier for a device * @dev: The device. + * @req_id: Device's requester ID * @idx: Index of the ITS identifier list. * @its_id: ITS identifier. * @@ -465,7 +510,7 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id, if (!node) return -ENXIO; - node = iort_node_map_rid(node, req_id, NULL, IORT_MSI_TYPE); + node = iort_node_map_id(node, req_id, NULL, IORT_MSI_TYPE); if (!node) return -ENXIO; @@ -503,6 +548,56 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI); } +/** + * iort_get_platform_device_domain() - Find MSI domain related to a + * platform device + * @dev: the dev pointer associated with the platform device + * + * Returns: the MSI domain for this device, NULL otherwise + */ +static struct irq_domain *iort_get_platform_device_domain(struct device *dev) +{ + struct acpi_iort_node *node, *msi_parent; + struct fwnode_handle *iort_fwnode; + struct acpi_iort_its_group *its; + int i; + + /* find its associated iort node */ + node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, + iort_match_node_callback, dev); + if (!node) + return NULL; + + /* then find its msi parent node */ + for (i = 0; i < node->mapping_count; i++) { + msi_parent = iort_node_map_platform_id(node, NULL, + IORT_MSI_TYPE, i); + if (msi_parent) + break; + } + + if (!msi_parent) + return NULL; + + /* Move to ITS specific data */ + its = (struct acpi_iort_its_group *)msi_parent->node_data; + + iort_fwnode = iort_find_domain_token(its->identifiers[0]); + if (!iort_fwnode) + return NULL; + + return irq_find_matching_fwnode(iort_fwnode, DOMAIN_BUS_PLATFORM_MSI); +} + +void acpi_configure_pmsi_domain(struct device *dev) +{ + struct irq_domain *msi_domain; + + msi_domain = iort_get_platform_device_domain(dev); + if (msi_domain) + dev_set_msi_domain(dev, msi_domain); +} + static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) { u32 *rid = data; @@ -594,8 +689,8 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) if (!node) return NULL; - parent = iort_node_map_rid(node, rid, &streamid, - IORT_IOMMU_TYPE); + parent = iort_node_map_id(node, rid, &streamid, + IORT_IOMMU_TYPE); ops = iort_iommu_xlate(dev, parent, streamid); @@ -607,14 +702,15 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) if (!node) return NULL; - parent = iort_node_get_id(node, &streamid, - IORT_IOMMU_TYPE, i++); + parent = iort_node_map_platform_id(node, &streamid, + IORT_IOMMU_TYPE, i++); while (parent) { ops = iort_iommu_xlate(dev, parent, streamid); - parent = iort_node_get_id(node, &streamid, - IORT_IOMMU_TYPE, i++); + parent = iort_node_map_platform_id(node, &streamid, + IORT_IOMMU_TYPE, + i++); } } diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index fb19e1cdb641..ec31b439b4c8 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -6,6 +6,8 @@ * * This file is released under the GPLv2. */ + +#include <linux/acpi_iort.h> #include <linux/export.h> #include <linux/init.h> #include <linux/list.h> @@ -14,6 +16,7 @@ #include <linux/rwsem.h> #include <linux/acpi.h> #include <linux/dma-mapping.h> +#include <linux/platform_device.h> #include "internal.h" @@ -322,6 +325,9 @@ static int acpi_platform_notify(struct device *dev) if (!adev) goto out; + if (dev->bus == &platform_bus_type) + acpi_configure_pmsi_domain(dev); + if (type && type->setup) type->setup(dev); else if (adev->handler && adev->handler->bind) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 260c4b4b492e..82973b86efe4 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -16,6 +16,22 @@ #include "efistub.h" +#define EFI_DT_ADDR_CELLS_DEFAULT 2 +#define EFI_DT_SIZE_CELLS_DEFAULT 2 + +static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt) +{ + int offset; + + offset = fdt_path_offset(fdt, "/"); + /* Set the #address-cells and #size-cells values for an empty tree */ + + fdt_setprop_u32(fdt, offset, "#address-cells", + EFI_DT_ADDR_CELLS_DEFAULT); + + fdt_setprop_u32(fdt, offset, "#size-cells", EFI_DT_SIZE_CELLS_DEFAULT); +} + static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, unsigned long orig_fdt_size, void *fdt, int new_fdt_size, char *cmdline_ptr, @@ -42,10 +58,18 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, } } - if (orig_fdt) + if (orig_fdt) { status = fdt_open_into(orig_fdt, fdt, new_fdt_size); - else + } else { status = fdt_create_empty_tree(fdt, new_fdt_size); + if (status == 0) { + /* + * Any failure from the following function is non + * critical + */ + fdt_update_cell_size(sys_table, fdt); + } + } if (status != 0) goto fdt_set_fail; diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 77e08099e554..26e25d85eb3e 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,6 +34,8 @@ void acpi_iort_init(void); bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); +void acpi_configure_pmsi_domain(struct device *dev); +int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ void iort_set_dma_mask(struct device *dev); const struct iommu_ops *iort_iommu_configure(struct device *dev); @@ -45,6 +47,7 @@ static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) static inline struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) { return NULL; } +static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline void iort_set_dma_mask(struct device *dev) { } static inline diff --git a/include/linux/memblock.h b/include/linux/memblock.h index bdfc65af4152..4ce24a376262 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -93,6 +93,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); +int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); ulong choose_memblock_flags(void); /* Low level functions */ @@ -335,6 +336,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); +void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size); void memblock_mem_limit_remove_map(phys_addr_t limit); bool memblock_is_memory(phys_addr_t addr); int memblock_is_map_memory(phys_addr_t addr); diff --git a/include/linux/pe.h b/include/linux/pe.h index e170b95e763b..143ce75be5f0 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -23,34 +23,6 @@ #define MZ_MAGIC 0x5a4d /* "MZ" */ -struct mz_hdr { - uint16_t magic; /* MZ_MAGIC */ - uint16_t lbsize; /* size of last used block */ - uint16_t blocks; /* pages in file, 0x3 */ - uint16_t relocs; /* relocations */ - uint16_t hdrsize; /* header size in "paragraphs" */ - uint16_t min_extra_pps; /* .bss */ - uint16_t max_extra_pps; /* runtime limit for the arena size */ - uint16_t ss; /* relative stack segment */ - uint16_t sp; /* initial %sp register */ - uint16_t checksum; /* word checksum */ - uint16_t ip; /* initial %ip register */ - uint16_t cs; /* initial %cs relative to load segment */ - uint16_t reloc_table_offset; /* offset of the first relocation */ - uint16_t overlay_num; /* overlay number. set to 0. */ - uint16_t reserved0[4]; /* reserved */ - uint16_t oem_id; /* oem identifier */ - uint16_t oem_info; /* oem specific */ - uint16_t reserved1[10]; /* reserved */ - uint32_t peaddr; /* address of pe header */ - char message[64]; /* message to print */ -}; - -struct mz_reloc { - uint16_t offset; - uint16_t segment; -}; - #define PE_MAGIC 0x00004550 /* "PE\0\0" */ #define PE_OPT_MAGIC_PE32 0x010b #define PE_OPT_MAGIC_PE32_ROM 0x0107 @@ -62,6 +34,7 @@ struct mz_reloc { #define IMAGE_FILE_MACHINE_AMD64 0x8664 #define IMAGE_FILE_MACHINE_ARM 0x01c0 #define IMAGE_FILE_MACHINE_ARMV7 0x01c4 +#define IMAGE_FILE_MACHINE_ARM64 0xaa64 #define IMAGE_FILE_MACHINE_EBC 0x0ebc #define IMAGE_FILE_MACHINE_I386 0x014c #define IMAGE_FILE_MACHINE_IA64 0x0200 @@ -98,17 +71,6 @@ struct mz_reloc { #define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 #define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 -struct pe_hdr { - uint32_t magic; /* PE magic */ - uint16_t machine; /* machine type */ - uint16_t sections; /* number of sections */ - uint32_t timestamp; /* time_t */ - uint32_t symbol_table; /* symbol table offset */ - uint32_t symbols; /* number of symbols */ - uint16_t opt_hdr_size; /* size of optional header */ - uint16_t flags; /* flags */ -}; - #define IMAGE_FILE_OPT_ROM_MAGIC 0x107 #define IMAGE_FILE_OPT_PE32_MAGIC 0x10b #define IMAGE_FILE_OPT_PE32_PLUS_MAGIC 0x20b @@ -134,6 +96,95 @@ struct pe_hdr { #define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 #define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 +/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ +#define IMAGE_SCN_RESERVED_0 0x00000001 +#define IMAGE_SCN_RESERVED_1 0x00000002 +#define IMAGE_SCN_RESERVED_2 0x00000004 +#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ +#define IMAGE_SCN_RESERVED_3 0x00000010 +#define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ +#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ +#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ +#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ +#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ +#define IMAGE_SCN_RESERVED_4 0x00000400 +#define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ +#define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ +#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ +#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ +#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ +/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ +#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ +#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ +#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ +#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ +/* and here they just stuck a 1-byte integer in the middle of a bitfield */ +#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ +#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 +#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 +#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 +#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 +#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 +#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 +#define IMAGE_SCN_ALIGN_128BYTES 0x00800000 +#define IMAGE_SCN_ALIGN_256BYTES 0x00900000 +#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000 +#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000 +#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 +#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 +#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 +#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ +#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ +#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ +#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ +#define IMAGE_SCN_MEM_SHARED 0x10000000 /* can be shared */ +#define IMAGE_SCN_MEM_EXECUTE 0x20000000 /* can be executed as code */ +#define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ +#define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ + +#define IMAGE_DEBUG_TYPE_CODEVIEW 2 + +#ifndef __ASSEMBLY__ + +struct mz_hdr { + uint16_t magic; /* MZ_MAGIC */ + uint16_t lbsize; /* size of last used block */ + uint16_t blocks; /* pages in file, 0x3 */ + uint16_t relocs; /* relocations */ + uint16_t hdrsize; /* header size in "paragraphs" */ + uint16_t min_extra_pps; /* .bss */ + uint16_t max_extra_pps; /* runtime limit for the arena size */ + uint16_t ss; /* relative stack segment */ + uint16_t sp; /* initial %sp register */ + uint16_t checksum; /* word checksum */ + uint16_t ip; /* initial %ip register */ + uint16_t cs; /* initial %cs relative to load segment */ + uint16_t reloc_table_offset; /* offset of the first relocation */ + uint16_t overlay_num; /* overlay number. set to 0. */ + uint16_t reserved0[4]; /* reserved */ + uint16_t oem_id; /* oem identifier */ + uint16_t oem_info; /* oem specific */ + uint16_t reserved1[10]; /* reserved */ + uint32_t peaddr; /* address of pe header */ + char message[64]; /* message to print */ +}; + +struct mz_reloc { + uint16_t offset; + uint16_t segment; +}; + +struct pe_hdr { + uint32_t magic; /* PE magic */ + uint16_t machine; /* machine type */ + uint16_t sections; /* number of sections */ + uint32_t timestamp; /* time_t */ + uint32_t symbol_table; /* symbol table offset */ + uint32_t symbols; /* number of symbols */ + uint16_t opt_hdr_size; /* size of optional header */ + uint16_t flags; /* flags */ +}; + /* the fact that pe32 isn't padded where pe32+ is 64-bit means union won't * work right. vomit. */ struct pe32_opt_hdr { @@ -243,52 +294,6 @@ struct section_header { uint32_t flags; }; -/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ -#define IMAGE_SCN_RESERVED_0 0x00000001 -#define IMAGE_SCN_RESERVED_1 0x00000002 -#define IMAGE_SCN_RESERVED_2 0x00000004 -#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ -#define IMAGE_SCN_RESERVED_3 0x00000010 -#define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ -#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ -#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ -#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ -#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ -#define IMAGE_SCN_RESERVED_4 0x00000400 -#define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ -#define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ -#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ -#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ -#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ -/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ -#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ -#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ -#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ -#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ -/* and here they just stuck a 1-byte integer in the middle of a bitfield */ -#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ -#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 -#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 -#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 -#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 -#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 -#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 -#define IMAGE_SCN_ALIGN_128BYTES 0x00800000 -#define IMAGE_SCN_ALIGN_256BYTES 0x00900000 -#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000 -#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000 -#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 -#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 -#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 -#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ -#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ -#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ -#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ -#define IMAGE_SCN_MEM_SHARED 0x10000000 /* can be shared */ -#define IMAGE_SCN_MEM_EXECUTE 0x20000000 /* can be executed as code */ -#define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ -#define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ - enum x64_coff_reloc_type { IMAGE_REL_AMD64_ABSOLUTE = 0, IMAGE_REL_AMD64_ADDR64, @@ -445,4 +450,6 @@ struct win_certificate { uint16_t cert_type; }; +#endif /* !__ASSEMBLY__ */ + #endif /* __LINUX_PE_H */ diff --git a/mm/memblock.c b/mm/memblock.c index 696f06d17c4e..b049c9b2dba8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -805,6 +805,18 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) } /** + * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. + * @base: the base phys addr of the region + * @size: the size of the region + * + * Return 0 on success, -errno on failure. + */ +int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); +} + +/** * __next_reserved_mem_region - next function for for_each_reserved_region() * @idx: pointer to u64 loop variable * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL @@ -1531,11 +1543,37 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit) (phys_addr_t)ULLONG_MAX); } +void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) +{ + int start_rgn, end_rgn; + int i, ret; + + if (!size) + return; + + ret = memblock_isolate_range(&memblock.memory, base, size, + &start_rgn, &end_rgn); + if (ret) + return; + + /* remove all the MAP regions */ + for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) + if (!memblock_is_nomap(&memblock.memory.regions[i])) + memblock_remove_region(&memblock.memory, i); + + for (i = start_rgn - 1; i >= 0; i--) + if (!memblock_is_nomap(&memblock.memory.regions[i])) + memblock_remove_region(&memblock.memory, i); + + /* truncate the reserved regions */ + memblock_remove_range(&memblock.reserved, 0, base); + memblock_remove_range(&memblock.reserved, + base + size, (phys_addr_t)ULLONG_MAX); +} + void __init memblock_mem_limit_remove_map(phys_addr_t limit) { - struct memblock_type *type = &memblock.memory; phys_addr_t max_addr; - int i, ret, start_rgn, end_rgn; if (!limit) return; @@ -1546,19 +1584,7 @@ void __init memblock_mem_limit_remove_map(phys_addr_t limit) if (max_addr == (phys_addr_t)ULLONG_MAX) return; - ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX, - &start_rgn, &end_rgn); - if (ret) - return; - - /* remove all the MAP regions above the limit */ - for (i = end_rgn - 1; i >= start_rgn; i--) { - if (!memblock_is_nomap(&type->regions[i])) - memblock_remove_region(type, i); - } - /* truncate the reserved regions */ - memblock_remove_range(&memblock.reserved, max_addr, - (phys_addr_t)ULLONG_MAX); + memblock_cap_memory_range(0, max_addr); } static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 654dfd40e449..7713d96e85b7 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -29,7 +29,9 @@ #define DEBUG_SPINLOCK_BUG_ON(p) #endif -struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,}; +struct vgic_global kvm_vgic_global_state __ro_after_init = { + .gicv3_cpuif = STATIC_KEY_FALSE_INIT, +}; /* * Locking order is always: |