summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm64/kasan-offsets.sh3
-rw-r--r--Documentation/arm64/memory.rst45
-rw-r--r--arch/arm64/Kconfig46
-rw-r--r--arch/arm64/include/asm/alternative-macros.h217
-rw-r--r--arch/arm64/include/asm/alternative.h267
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h61
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h8
-rw-r--r--arch/arm64/include/asm/insn.h3
-rw-r--r--arch/arm64/include/asm/memory.h22
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/rwonce.h73
-rw-r--r--arch/arm64/include/asm/topology.h4
-rw-r--r--arch/arm64/kernel/alternative.c7
-rw-r--r--arch/arm64/kernel/cpufeature.c23
-rw-r--r--arch/arm64/kernel/efi-header.S84
-rw-r--r--arch/arm64/kernel/head.S19
-rw-r--r--arch/arm64/kernel/kaslr.c26
-rw-r--r--arch/arm64/kernel/proton-pack.c1
-rw-r--r--arch/arm64/kernel/setup.c4
-rw-r--r--arch/arm64/kernel/topology.c136
-rw-r--r--arch/arm64/kernel/vdso/Makefile2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S4
-rw-r--r--arch/arm64/kvm/mmu.c2
-rw-r--r--arch/arm64/lib/mte.S2
-rw-r--r--arch/arm64/mm/init.c56
-rw-r--r--arch/arm64/mm/mmu.c142
-rw-r--r--drivers/acpi/arm64/iort.c55
-rw-r--r--drivers/of/address.c42
-rw-r--r--drivers/of/unittest.c21
-rw-r--r--include/linux/acpi_iort.h4
-rw-r--r--include/linux/mmzone.h20
-rw-r--r--include/linux/of.h7
34 files changed, 923 insertions, 490 deletions
diff --git a/Documentation/arm64/kasan-offsets.sh b/Documentation/arm64/kasan-offsets.sh
index 2b7a021db363..2dc5f9e18039 100644
--- a/Documentation/arm64/kasan-offsets.sh
+++ b/Documentation/arm64/kasan-offsets.sh
@@ -1,12 +1,11 @@
#!/bin/sh
# Print out the KASAN_SHADOW_OFFSETS required to place the KASAN SHADOW
-# start address at the mid-point of the kernel VA space
+# start address at the top of the linear region
print_kasan_offset () {
printf "%02d\t" $1
printf "0x%08x00000000\n" $(( (0xffffffff & (-1 << ($1 - 1 - 32))) \
- + (1 << ($1 - 32 - $2)) \
- (1 << (64 - 32 - $2)) ))
}
diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index cf03b3290800..e7522e5c8322 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -32,17 +32,16 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
-----------------------------------------------------------------------
0000000000000000 0000ffffffffffff 256TB user
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
- ffff800000000000 ffff9fffffffffff 32TB kasan shadow region
- ffffa00000000000 ffffa00007ffffff 128MB bpf jit region
- ffffa00008000000 ffffa0000fffffff 128MB modules
- ffffa00010000000 fffffdffbffeffff ~93TB vmalloc
- fffffdffbfff0000 fffffdfffe5f8fff ~998MB [guard region]
- fffffdfffe5f9000 fffffdfffe9fffff 4124KB fixed mappings
- fffffdfffea00000 fffffdfffebfffff 2MB [guard region]
- fffffdfffec00000 fffffdffffbfffff 16MB PCI I/O space
- fffffdffffc00000 fffffdffffdfffff 2MB [guard region]
- fffffdffffe00000 ffffffffffdfffff 2TB vmemmap
- ffffffffffe00000 ffffffffffffffff 2MB [guard region]
+ [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
+ ffff800000000000 ffff800007ffffff 128MB bpf jit region
+ ffff800008000000 ffff80000fffffff 128MB modules
+ ffff800010000000 fffffbffefffffff 124TB vmalloc
+ fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
+ fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
+ fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
+ fffffbffff800000 fffffbffffffffff 8MB [guard region]
+ fffffc0000000000 fffffdffffffffff 2TB vmemmap
+ fffffe0000000000 ffffffffffffffff 2TB [guard region]
AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support)::
@@ -50,19 +49,17 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
Start End Size Use
-----------------------------------------------------------------------
0000000000000000 000fffffffffffff 4PB user
- fff0000000000000 fff7ffffffffffff 2PB kernel logical memory map
- fff8000000000000 fffd9fffffffffff 1440TB [gap]
- fffda00000000000 ffff9fffffffffff 512TB kasan shadow region
- ffffa00000000000 ffffa00007ffffff 128MB bpf jit region
- ffffa00008000000 ffffa0000fffffff 128MB modules
- ffffa00010000000 fffff81ffffeffff ~88TB vmalloc
- fffff81fffff0000 fffffc1ffe58ffff ~3TB [guard region]
- fffffc1ffe590000 fffffc1ffe9fffff 4544KB fixed mappings
- fffffc1ffea00000 fffffc1ffebfffff 2MB [guard region]
- fffffc1ffec00000 fffffc1fffbfffff 16MB PCI I/O space
- fffffc1fffc00000 fffffc1fffdfffff 2MB [guard region]
- fffffc1fffe00000 ffffffffffdfffff 3968GB vmemmap
- ffffffffffe00000 ffffffffffffffff 2MB [guard region]
+ fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
+ [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
+ ffff800000000000 ffff800007ffffff 128MB bpf jit region
+ ffff800008000000 ffff80000fffffff 128MB modules
+ ffff800010000000 fffffbffefffffff 124TB vmalloc
+ fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
+ fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
+ fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
+ fffffbffff800000 fffffbffffffffff 8MB [guard region]
+ fffffc0000000000 ffffffdfffffffff ~4TB vmemmap
+ ffffffe000000000 ffffffffffffffff 128GB [guard region]
Translation table lookup with 4KB pages::
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..2272a9505727 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -331,16 +331,16 @@ config BROKEN_GAS_INST
config KASAN_SHADOW_OFFSET
hex
depends on KASAN
- default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
- default 0xdfffd00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
- default 0xdffffe8000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
- default 0xdfffffd000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
- default 0xdffffffa00000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
- default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
- default 0xefffc80000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
- default 0xeffffe4000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
- default 0xefffffc800000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
- default 0xeffffff900000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
+ default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
+ default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
+ default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
+ default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
+ default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
+ default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
+ default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
+ default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
+ default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
+ default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
default 0xffffffffffffffff
source "arch/arm64/Kconfig.platforms"
@@ -1388,6 +1388,9 @@ config ARM64_PAN
The feature is detected at runtime, and will remain as a 'nop'
instruction if the cpu does not implement the feature.
+config AS_HAS_LDAPR
+ def_bool $(as-instr,.arch_extension rcpc)
+
config ARM64_LSE_ATOMICS
bool
default ARM64_USE_LSE_ATOMICS
@@ -1846,15 +1849,36 @@ config CMDLINE
entering them here. As a minimum, you should specify the the
root device (e.g. root=/dev/nfs).
+choice
+ prompt "Kernel command line type" if CMDLINE != ""
+ default CMDLINE_FROM_BOOTLOADER
+ help
+ Choose how the kernel will handle the provided default kernel
+ command line string.
+
+config CMDLINE_FROM_BOOTLOADER
+ bool "Use bootloader kernel arguments if available"
+ help
+ Uses the command-line options passed by the boot loader. If
+ the boot loader doesn't provide any, the default kernel command
+ string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+ bool "Extend bootloader kernel arguments"
+ help
+ The command-line arguments provided by the boot loader will be
+ appended to the default kernel command string.
+
config CMDLINE_FORCE
bool "Always use the default kernel command string"
- depends on CMDLINE != ""
help
Always use the default kernel command string, even if the boot
loader passes other arguments to the kernel.
This is useful if you cannot or don't want to change the
command-line options your boot loader passes to the kernel.
+endchoice
+
config EFI_STUB
bool
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
new file mode 100644
index 000000000000..5df500dcc627
--- /dev/null
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#include <asm/cpucaps.h>
+
+#define ARM64_CB_PATCH ARM64_NCAPS
+
+/* A64 instructions are always 32 bits. */
+#define AARCH64_INSN_SIZE 4
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+#define ALTINSTR_ENTRY(feature) \
+ " .word 661b - .\n" /* label */ \
+ " .word 663f - .\n" /* new instruction */ \
+ " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+#define ALTINSTR_ENTRY_CB(feature, cb) \
+ " .word 661b - .\n" /* label */ \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+/*
+ * alternative assembly primitive:
+ *
+ * If any of these .org directive fail, it means that insn1 and insn2
+ * don't have the same length. This used to be written as
+ *
+ * .if ((664b-663b) != (662b-661b))
+ * .error "Alternatives instruction length mismatch"
+ * .endif
+ *
+ * but most assemblers die if insn1 or insn2 have a .inst. This should
+ * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
+ * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
+ */
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feature) \
+ ".popsection\n" \
+ ".subsection 1\n" \
+ "663:\n\t" \
+ newinstr "\n" \
+ "664:\n\t" \
+ ".org . - (664b-663b) + (662b-661b)\n\t" \
+ ".org . - (662b-661b) + (664b-663b)\n\t" \
+ ".previous\n" \
+ ".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY_CB(feature, cb) \
+ ".popsection\n" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n"
+
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
+ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+
+#define ALTERNATIVE_CB(oldinstr, cb) \
+ __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
+#else
+
+#include <asm/assembler.h>
+
+.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+ .word \orig_offset - .
+ .word \alt_offset - .
+ .hword \feature
+ .byte \orig_len
+ .byte \alt_len
+.endm
+
+.macro alternative_insn insn1, insn2, cap, enable = 1
+ .if \enable
+661: \insn1
+662: .pushsection .altinstructions, "a"
+ altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+ .popsection
+ .subsection 1
+663: \insn2
+664: .previous
+ .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+ .endif
+.endm
+
+/*
+ * Alternative sequences
+ *
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
+ *
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
+ *
+ * 1. Be exactly the same length (in bytes) as the default code
+ * sequence.
+ *
+ * 2. Not contain a branch target that is used outside of the
+ * alternative sequence it is defined in (branches into an
+ * alternative sequence are not fixed up).
+ */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+ .popsection
+661:
+.endm
+
+.macro alternative_if cap
+ .set .Lasm_alt_mode, 1
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+ .popsection
+ .subsection 1
+ .align 2 /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+.macro alternative_cb cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+ .popsection
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
+.macro alternative_else
+662:
+ .if .Lasm_alt_mode==0
+ .subsection 1
+ .else
+ .previous
+ .endif
+663:
+.endm
+
+/*
+ * Complete an alternative code sequence.
+ */
+.macro alternative_endif
+664:
+ .if .Lasm_alt_mode==0
+ .previous
+ .endif
+ .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+.endm
+
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+ nops (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
+#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
+ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+
+.macro user_alt, label, oldinstr, newinstr, cond
+9999: alternative_insn "\oldinstr", "\newinstr", \cond
+ _asm_extable 9999b, \label
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ *
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * N.B. If CONFIG_FOO is specified, but not selected, the whole block
+ * will be omitted, including oldinstr.
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ...) \
+ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+
+#endif /* __ASM_ALTERNATIVE_MACROS_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 619db9b4c9d5..a38b92e11811 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -2,17 +2,13 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
-#include <asm/cpucaps.h>
-#include <asm/insn.h>
-
-#define ARM64_CB_PATCH ARM64_NCAPS
+#include <asm/alternative-macros.h>
#ifndef __ASSEMBLY__
#include <linux/init.h>
#include <linux/types.h>
#include <linux/stddef.h>
-#include <linux/stringify.h>
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
@@ -35,264 +31,5 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
-#define ALTINSTR_ENTRY(feature) \
- " .word 661b - .\n" /* label */ \
- " .word 663f - .\n" /* new instruction */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
- " .byte 662b-661b\n" /* source len */ \
- " .byte 664f-663f\n" /* replacement len */
-
-#define ALTINSTR_ENTRY_CB(feature, cb) \
- " .word 661b - .\n" /* label */ \
- " .word " __stringify(cb) "- .\n" /* callback */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
- " .byte 662b-661b\n" /* source len */ \
- " .byte 664f-663f\n" /* replacement len */
-
-/*
- * alternative assembly primitive:
- *
- * If any of these .org directive fail, it means that insn1 and insn2
- * don't have the same length. This used to be written as
- *
- * .if ((664b-663b) != (662b-661b))
- * .error "Alternatives instruction length mismatch"
- * .endif
- *
- * but most assemblers die if insn1 or insn2 have a .inst. This should
- * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
- * containing commit 4e4d08cf7399b606 or c1baaddf8861).
- *
- * Alternatives with callbacks do not generate replacement instructions.
- */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
- ".if "__stringify(cfg_enabled)" == 1\n" \
- "661:\n\t" \
- oldinstr "\n" \
- "662:\n" \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
- ".popsection\n" \
- ".subsection 1\n" \
- "663:\n\t" \
- newinstr "\n" \
- "664:\n\t" \
- ".org . - (664b-663b) + (662b-661b)\n\t" \
- ".org . - (662b-661b) + (664b-663b)\n\t" \
- ".previous\n" \
- ".endif\n"
-
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
- ".if "__stringify(cfg_enabled)" == 1\n" \
- "661:\n\t" \
- oldinstr "\n" \
- "662:\n" \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY_CB(feature, cb) \
- ".popsection\n" \
- "663:\n\t" \
- "664:\n\t" \
- ".endif\n"
-
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
-
-#define ALTERNATIVE_CB(oldinstr, cb) \
- __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
-#else
-
-#include <asm/assembler.h>
-
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
- .word \orig_offset - .
- .word \alt_offset - .
- .hword \feature
- .byte \orig_len
- .byte \alt_len
-.endm
-
-.macro alternative_insn insn1, insn2, cap, enable = 1
- .if \enable
-661: \insn1
-662: .pushsection .altinstructions, "a"
- altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
- .popsection
- .subsection 1
-663: \insn2
-664: .previous
- .org . - (664b-663b) + (662b-661b)
- .org . - (662b-661b) + (664b-663b)
- .endif
-.endm
-
-/*
- * Alternative sequences
- *
- * The code for the case where the capability is not present will be
- * assembled and linked as normal. There are no restrictions on this
- * code.
- *
- * The code for the case where the capability is present will be
- * assembled into a special section to be used for dynamic patching.
- * Code for that case must:
- *
- * 1. Be exactly the same length (in bytes) as the default code
- * sequence.
- *
- * 2. Not contain a branch target that is used outside of the
- * alternative sequence it is defined in (branches into an
- * alternative sequence are not fixed up).
- */
-
-/*
- * Begin an alternative code sequence.
- */
-.macro alternative_if_not cap
- .set .Lasm_alt_mode, 0
- .pushsection .altinstructions, "a"
- altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
- .popsection
-661:
-.endm
-
-.macro alternative_if cap
- .set .Lasm_alt_mode, 1
- .pushsection .altinstructions, "a"
- altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
- .popsection
- .subsection 1
- .align 2 /* So GAS knows label 661 is suitably aligned */
-661:
-.endm
-
-.macro alternative_cb cb
- .set .Lasm_alt_mode, 0
- .pushsection .altinstructions, "a"
- altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
- .popsection
-661:
-.endm
-
-/*
- * Provide the other half of the alternative code sequence.
- */
-.macro alternative_else
-662:
- .if .Lasm_alt_mode==0
- .subsection 1
- .else
- .previous
- .endif
-663:
-.endm
-
-/*
- * Complete an alternative code sequence.
- */
-.macro alternative_endif
-664:
- .if .Lasm_alt_mode==0
- .previous
- .endif
- .org . - (664b-663b) + (662b-661b)
- .org . - (662b-661b) + (664b-663b)
-.endm
-
-/*
- * Callback-based alternative epilogue
- */
-.macro alternative_cb_end
-662:
-.endm
-
-/*
- * Provides a trivial alternative or default sequence consisting solely
- * of NOPs. The number of NOPs is chosen automatically to match the
- * previous case.
- */
-.macro alternative_else_nop_endif
-alternative_else
- nops (662b-661b) / AARCH64_INSN_SIZE
-alternative_endif
-.endm
-
-#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
- alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-
-.macro user_alt, label, oldinstr, newinstr, cond
-9999: alternative_insn "\oldinstr", "\newinstr", \cond
- _asm_extable 9999b, \label
-.endm
-
-/*
- * Generate the assembly for UAO alternatives with exception table entries.
- * This is complicated as there is no post-increment or pair versions of the
- * unprivileged instructions, and USER() only works for single instructions.
- */
-#ifdef CONFIG_ARM64_UAO
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: ldp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- ldtr \reg1, [\addr];
- ldtr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
- .macro uao_stp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: stp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- sttr \reg1, [\addr];
- sttr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
- .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: \inst \reg, [\addr], \post_inc;
- nop;
- alternative_else
- \alt_inst \reg, [\addr];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- .endm
-#else
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
- .endm
- .macro uao_stp l, reg1, reg2, addr, post_inc
- USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
- .endm
- .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
- USER(\l, \inst \reg, [\addr], \post_inc)
- .endm
-#endif
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
- *
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
- * N.B. If CONFIG_FOO is specified, but not selected, the whole block
- * will be omitted, including oldinstr.
- */
-#define ALTERNATIVE(oldinstr, newinstr, ...) \
- _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
-
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index f68a0e64482a..2c26ca5b7bb0 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -2,7 +2,7 @@
#ifndef __ASM_ASM_UACCESS_H
#define __ASM_ASM_UACCESS_H
-#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
#include <asm/kernel-pgtable.h>
#include <asm/mmu.h>
#include <asm/sysreg.h>
@@ -58,4 +58,63 @@ alternative_else_nop_endif
.endm
#endif
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+ .macro uao_ldp l, reg1, reg2, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: ldp \reg1, \reg2, [\addr], \post_inc;
+8889: nop;
+ nop;
+ alternative_else
+ ldtr \reg1, [\addr];
+ ldtr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro uao_stp l, reg1, reg2, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: stp \reg1, \reg2, [\addr], \post_inc;
+8889: nop;
+ nop;
+ alternative_else
+ sttr \reg1, [\addr];
+ sttr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ _asm_extable 8889b,\l;
+ .endm
+
+ .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+ alternative_if_not ARM64_HAS_UAO
+8888: \inst \reg, [\addr], \post_inc;
+ nop;
+ alternative_else
+ \alt_inst \reg, [\addr];
+ add \addr, \addr, \post_inc;
+ alternative_endif
+
+ _asm_extable 8888b,\l;
+ .endm
+#else
+ .macro uao_ldp l, reg1, reg2, addr, post_inc
+ USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+ .endm
+ .macro uao_stp l, reg1, reg2, addr, post_inc
+ USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+ .endm
+ .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+ USER(\l, \inst \reg, [\addr], \post_inc)
+ .endm
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index e7d98997c09c..64ea0bb9f420 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -66,7 +66,8 @@
#define ARM64_HAS_TLB_RANGE 56
#define ARM64_MTE 57
#define ARM64_WORKAROUND_1508412 58
+#define ARM64_HAS_LDAPR 59
-#define ARM64_NCAPS 59
+#define ARM64_NCAPS 60
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 97244d4feca9..f5b44ac354dc 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -765,8 +765,16 @@ static inline bool cpu_has_hw_af(void)
#ifdef CONFIG_ARM64_AMU_EXTN
/* Check whether the cpu supports the Activity Monitors Unit (AMU) */
extern bool cpu_has_amu_feat(int cpu);
+#else
+static inline bool cpu_has_amu_feat(int cpu)
+{
+ return false;
+}
#endif
+/* Get a cpu that supports the Activity Monitors Unit (AMU) */
+extern int get_cpu_with_amu_feat(void);
+
static inline unsigned int get_vmid_bits(u64 mmfr1)
{
int vmid_bits;
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4b39293d0f72..4ebb9c054ccc 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -10,8 +10,7 @@
#include <linux/build_bug.h>
#include <linux/types.h>
-/* A64 instructions are always 32 bits. */
-#define AARCH64_INSN_SIZE 4
+#include <asm/alternative.h>
#ifndef __ASSEMBLY__
/*
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index cd61239bae8c..556cb2d62b5b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -30,8 +30,8 @@
* keep a constant PAGE_OFFSET and "fallback" to using the higher end
* of the VMEMMAP where 52-bit support is not available in hardware.
*/
-#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \
- >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
/*
* PAGE_OFFSET - the virtual address of the start of the linear map, at the
@@ -44,17 +44,17 @@
#define _PAGE_OFFSET(va) (-(UL(1) << (va)))
#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
#define KIMAGE_VADDR (MODULES_END)
-#define BPF_JIT_REGION_START (KASAN_SHADOW_END)
+#define BPF_JIT_REGION_START (_PAGE_END(VA_BITS_MIN))
#define BPF_JIT_REGION_SIZE (SZ_128M)
#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (BPF_JIT_REGION_END)
#define MODULES_VSIZE (SZ_128M)
-#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M)
+#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
-#define PCI_IO_END (VMEMMAP_START - SZ_2M)
+#define PCI_IO_END (VMEMMAP_START - SZ_8M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
+#define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
#if VA_BITS > 48
#define VA_BITS_MIN (48)
@@ -76,10 +76,11 @@
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
+ KASAN_SHADOW_OFFSET)
+#define PAGE_END (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT)))
#define KASAN_THREAD_SHIFT 1
#else
#define KASAN_THREAD_SHIFT 0
-#define KASAN_SHADOW_END (_PAGE_END(VA_BITS_MIN))
+#define PAGE_END (_PAGE_END(VA_BITS_MIN))
#endif /* CONFIG_KASAN */
#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT)
@@ -167,7 +168,6 @@
#include <asm/bug.h>
extern u64 vabits_actual;
-#define PAGE_END (_PAGE_END(vabits_actual))
extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
@@ -238,11 +238,9 @@ static inline const void *__tag_set(const void *addr, u8 tag)
/*
- * The linear kernel range starts at the bottom of the virtual address
- * space. Testing the top bit for the start of the region is a
- * sufficient check and avoids having to worry about the tag.
+ * The linear kernel range starts at the bottom of the virtual address space.
*/
-#define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1)))
+#define __is_lm_address(addr) (((u64)(addr) & ~PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
#define __kimg_to_phys(addr) ((addr) - kimage_voffset)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4ff12a7adcfd..ec307b8bcb15 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -22,7 +22,7 @@
* and fixed mappings
*/
#define VMALLOC_START (MODULES_END)
-#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+#define VMALLOC_END (VMEMMAP_START - SZ_256M)
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
new file mode 100644
index 000000000000..1bce62fa908a
--- /dev/null
+++ b/arch/arm64/include/asm/rwonce.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_LTO
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+
+#ifndef BUILD_VDSO
+
+#ifdef CONFIG_AS_HAS_LDAPR
+#define __LOAD_RCPC(sfx, regs...) \
+ ALTERNATIVE( \
+ "ldar" #sfx "\t" #regs, \
+ ".arch_extension rcpc\n" \
+ "ldapr" #sfx "\t" #regs, \
+ ARM64_HAS_LDAPR)
+#else
+#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs
+#endif /* CONFIG_AS_HAS_LDAPR */
+
+/*
+ * When building with LTO, there is an increased risk of the compiler
+ * converting an address dependency headed by a READ_ONCE() invocation
+ * into a control dependency and consequently allowing for harmful
+ * reordering by the CPU.
+ *
+ * Ensure that such transformations are harmless by overriding the generic
+ * READ_ONCE() definition with one that provides RCpc acquire semantics
+ * when building with LTO.
+ */
+#define __READ_ONCE(x) \
+({ \
+ typeof(&(x)) __x = &(x); \
+ int atomic = 1; \
+ union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
+ switch (sizeof(x)) { \
+ case 1: \
+ asm volatile(__LOAD_RCPC(b, %w0, %1) \
+ : "=r" (*(__u8 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(__LOAD_RCPC(h, %w0, %1) \
+ : "=r" (*(__u16 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(__LOAD_RCPC(, %w0, %1) \
+ : "=r" (*(__u32 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile(__LOAD_RCPC(, %0, %1) \
+ : "=r" (*(__u64 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ default: \
+ atomic = 0; \
+ } \
+ atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
+})
+
+#endif /* !BUILD_VDSO */
+#endif /* CONFIG_LTO */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 11a465243f66..3b8dca4eb08d 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -16,12 +16,14 @@ int pcibus_to_node(struct pci_bus *bus);
#include <linux/arch_topology.h>
+void update_freq_counters_refs(void);
+void topology_scale_freq_tick(void);
+
#ifdef CONFIG_ARM64_AMU_EXTN
/*
* Replace task scheduler's default counter-based
* frequency-invariance scale factor setting.
*/
-void topology_scale_freq_tick(void);
#define arch_scale_freq_tick topology_scale_freq_tick
#endif /* CONFIG_ARM64_AMU_EXTN */
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 73039949b5ce..a57cffb752e8 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -21,7 +21,8 @@
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-static int all_alternatives_applied;
+/* Volatile, as we may be patching the guts of READ_ONCE() */
+static volatile int all_alternatives_applied;
static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
@@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
/* We always have a CPU 0 at this point (__init) */
if (smp_processor_id()) {
- while (!READ_ONCE(all_alternatives_applied))
+ while (!all_alternatives_applied)
cpu_relax();
isb();
} else {
@@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused)
BUG_ON(all_alternatives_applied);
__apply_alternatives(&region, false, remaining_capabilities);
/* Barriers provided by the cache flushing */
- WRITE_ONCE(all_alternatives_applied, 1);
+ all_alternatives_applied = 1;
}
return 0;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index dcc165b3fc04..bffcd55668c7 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1526,8 +1526,10 @@ bool cpu_has_amu_feat(int cpu)
return cpumask_test_cpu(cpu, &amu_cpus);
}
-/* Initialize the use of AMU counters for frequency invariance */
-extern void init_cpu_freq_invariance_counters(void);
+int get_cpu_with_amu_feat(void)
+{
+ return cpumask_any(&amu_cpus);
+}
static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
{
@@ -1535,7 +1537,7 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
smp_processor_id());
cpumask_set_cpu(smp_processor_id(), &amu_cpus);
- init_cpu_freq_invariance_counters();
+ update_freq_counters_refs();
}
}
@@ -1557,6 +1559,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap,
return true;
}
+#else
+int get_cpu_with_amu_feat(void)
+{
+ return nr_cpu_ids;
+}
#endif
#ifdef CONFIG_ARM64_VHE
@@ -2136,6 +2143,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_mte,
},
#endif /* CONFIG_ARM64_MTE */
+ {
+ .desc = "RCpc load-acquire (LDAPR)",
+ .capability = ARM64_HAS_LDAPR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .sys_reg = SYS_ID_AA64ISAR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+ .matches = has_cpuid_feature,
+ .min_field_value = 1,
+ },
{},
};
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index a71844fb923e..28d8a5dca5f1 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -7,30 +7,48 @@
#include <linux/pe.h>
#include <linux/sizes.h>
+ .macro efi_signature_nop
+#ifdef CONFIG_EFI
+.L_head:
+ /*
+ * This ccmp instruction has no meaningful effect except that
+ * its opcode forms the magic "MZ" signature required by UEFI.
+ */
+ ccmp x18, #0, #0xd, pl
+#else
+ /*
+ * Bootloaders may inspect the opcode at the start of the kernel
+ * image to decide if the kernel is capable of booting via UEFI.
+ * So put an ordinary NOP here, not the "MZ.." pseudo-nop above.
+ */
+ nop
+#endif
+ .endm
+
.macro __EFI_PE_HEADER
+#ifdef CONFIG_EFI
+ .set .Lpe_header_offset, . - .L_head
.long PE_MAGIC
-coff_header:
.short IMAGE_FILE_MACHINE_ARM64 // Machine
- .short section_count // NumberOfSections
+ .short .Lsection_count // NumberOfSections
.long 0 // TimeDateStamp
.long 0 // PointerToSymbolTable
.long 0 // NumberOfSymbols
- .short section_table - optional_header // SizeOfOptionalHeader
+ .short .Lsection_table - .Loptional_header // SizeOfOptionalHeader
.short IMAGE_FILE_DEBUG_STRIPPED | \
IMAGE_FILE_EXECUTABLE_IMAGE | \
IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
-optional_header:
+.Loptional_header:
.short PE_OPT_MAGIC_PE32PLUS // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
- .long __initdata_begin - efi_header_end // SizeOfCode
+ .long __initdata_begin - .Lefi_header_end // SizeOfCode
.long __pecoff_data_size // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
- .long __efistub_efi_pe_entry - _head // AddressOfEntryPoint
- .long efi_header_end - _head // BaseOfCode
+ .long __efistub_efi_pe_entry - .L_head // AddressOfEntryPoint
+ .long .Lefi_header_end - .L_head // BaseOfCode
-extra_header_fields:
.quad 0 // ImageBase
.long SEGMENT_ALIGN // SectionAlignment
.long PECOFF_FILE_ALIGNMENT // FileAlignment
@@ -42,10 +60,10 @@ extra_header_fields:
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
- .long _end - _head // SizeOfImage
+ .long _end - .L_head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long efi_header_end - _head // SizeOfHeaders
+ .long .Lefi_header_end - .L_head // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
.short 0 // DllCharacteristics
@@ -54,7 +72,7 @@ extra_header_fields:
.quad 0 // SizeOfHeapReserve
.quad 0 // SizeOfHeapCommit
.long 0 // LoaderFlags
- .long (section_table - .) / 8 // NumberOfRvaAndSizes
+ .long (.Lsection_table - .) / 8 // NumberOfRvaAndSizes
.quad 0 // ExportTable
.quad 0 // ImportTable
@@ -64,17 +82,17 @@ extra_header_fields:
.quad 0 // BaseRelocationTable
#ifdef CONFIG_DEBUG_EFI
- .long efi_debug_table - _head // DebugTable
- .long efi_debug_table_size
+ .long .Lefi_debug_table - .L_head // DebugTable
+ .long .Lefi_debug_table_size
#endif
// Section table
-section_table:
+.Lsection_table:
.ascii ".text\0\0\0"
- .long __initdata_begin - efi_header_end // VirtualSize
- .long efi_header_end - _head // VirtualAddress
- .long __initdata_begin - efi_header_end // SizeOfRawData
- .long efi_header_end - _head // PointerToRawData
+ .long __initdata_begin - .Lefi_header_end // VirtualSize
+ .long .Lefi_header_end - .L_head // VirtualAddress
+ .long __initdata_begin - .Lefi_header_end // SizeOfRawData
+ .long .Lefi_header_end - .L_head // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
@@ -86,9 +104,9 @@ section_table:
.ascii ".data\0\0\0"
.long __pecoff_data_size // VirtualSize
- .long __initdata_begin - _head // VirtualAddress
+ .long __initdata_begin - .L_head // VirtualAddress
.long __pecoff_data_rawsize // SizeOfRawData
- .long __initdata_begin - _head // PointerToRawData
+ .long __initdata_begin - .L_head // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
@@ -98,7 +116,7 @@ section_table:
IMAGE_SCN_MEM_READ | \
IMAGE_SCN_MEM_WRITE // Characteristics
- .set section_count, (. - section_table) / 40
+ .set .Lsection_count, (. - .Lsection_table) / 40
#ifdef CONFIG_DEBUG_EFI
/*
@@ -114,21 +132,21 @@ section_table:
__INITRODATA
.align 2
-efi_debug_table:
+.Lefi_debug_table:
// EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
.long 0 // Characteristics
.long 0 // TimeDateStamp
.short 0 // MajorVersion
.short 0 // MinorVersion
.long IMAGE_DEBUG_TYPE_CODEVIEW // Type
- .long efi_debug_entry_size // SizeOfData
+ .long .Lefi_debug_entry_size // SizeOfData
.long 0 // RVA
- .long efi_debug_entry - _head // FileOffset
+ .long .Lefi_debug_entry - .L_head // FileOffset
- .set efi_debug_table_size, . - efi_debug_table
+ .set .Lefi_debug_table_size, . - .Lefi_debug_table
.previous
-efi_debug_entry:
+.Lefi_debug_entry:
// EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
.ascii "NB10" // Signature
.long 0 // Unknown
@@ -137,16 +155,12 @@ efi_debug_entry:
.asciz VMLINUX_PATH
- .set efi_debug_entry_size, . - efi_debug_entry
+ .set .Lefi_debug_entry_size, . - .Lefi_debug_entry
#endif
- /*
- * EFI will load .text onwards at the 4k section alignment
- * described in the PE/COFF header. To ensure that instruction
- * sequences using an adrp and a :lo12: immediate will function
- * correctly at this alignment, we must ensure that .text is
- * placed at a 4k boundary in the Image to begin with.
- */
.balign SEGMENT_ALIGN
-efi_header_end:
+.Lefi_header_end:
+#else
+ .set .Lpe_header_offset, 0x0
+#endif
.endm
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index d8d9caf02834..c1f8f2c5be47 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -58,21 +58,11 @@
* in the entry routines.
*/
__HEAD
-_head:
/*
* DO NOT MODIFY. Image header expected by Linux boot-loaders.
*/
-#ifdef CONFIG_EFI
- /*
- * This add instruction has no meaningful effect except that
- * its opcode forms the magic "MZ" signature required by UEFI.
- */
- add x13, x18, #0x16
- b primary_entry
-#else
+ efi_signature_nop // special NOP to identity as PE/COFF executable
b primary_entry // branch to kernel start, magic
- .long 0 // reserved
-#endif
.quad 0 // Image load offset from start of RAM, little-endian
le64sym _kernel_size_le // Effective size of kernel image, little-endian
le64sym _kernel_flags_le // Informative flags, little-endian
@@ -80,14 +70,9 @@ _head:
.quad 0 // reserved
.quad 0 // reserved
.ascii ARM64_IMAGE_MAGIC // Magic number
-#ifdef CONFIG_EFI
- .long pe_header - _head // Offset to the PE header.
+ .long .Lpe_header_offset // Offset to the PE header.
-pe_header:
__EFI_PE_HEADER
-#else
- .long 0 // reserved
-#endif
__INIT
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index b181e0544b79..0921aa1520b0 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -50,10 +50,16 @@ static __init u64 get_kaslr_seed(void *fdt)
return ret;
}
-static __init const u8 *kaslr_get_cmdline(void *fdt)
+static __init bool cmdline_contains_nokaslr(const u8 *cmdline)
{
- static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+ const u8 *str;
+ str = strstr(cmdline, "nokaslr");
+ return str == cmdline || (str > cmdline && *(str - 1) == ' ');
+}
+
+static __init bool is_kaslr_disabled_cmdline(void *fdt)
+{
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
int node;
const u8 *prop;
@@ -65,10 +71,17 @@ static __init const u8 *kaslr_get_cmdline(void *fdt)
prop = fdt_getprop(fdt, node, "bootargs", NULL);
if (!prop)
goto out;
- return prop;
+
+ if (cmdline_contains_nokaslr(prop))
+ return true;
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+ goto out;
+
+ return false;
}
out:
- return default_cmdline;
+ return cmdline_contains_nokaslr(CONFIG_CMDLINE);
}
/*
@@ -83,7 +96,6 @@ u64 __init kaslr_early_init(u64 dt_phys)
{
void *fdt;
u64 seed, offset, mask, module_range;
- const u8 *cmdline, *str;
unsigned long raw;
int size;
@@ -115,9 +127,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* Check if 'nokaslr' appears on the command line, and
* return 0 if that is the case.
*/
- cmdline = kaslr_get_cmdline(fdt);
- str = strstr(cmdline, "nokaslr");
- if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+ if (is_kaslr_disabled_cmdline(fdt)) {
kaslr_status = KASLR_DISABLED_CMDLINE;
return 0;
}
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index c18eb7d41274..4b202e460e6d 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -24,6 +24,7 @@
#include <linux/prctl.h>
#include <linux/sched/task_stack.h>
+#include <asm/insn.h>
#include <asm/spectre.h>
#include <asm/traps.h>
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 133257ffd859..fe1cf52f5f80 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -206,7 +206,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_text);
+ kernel_code.start = __pa_symbol(_stext);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@@ -283,7 +283,7 @@ u64 cpu_logical_map(int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- init_mm.start_code = (unsigned long) _text;
+ init_mm.start_code = (unsigned long) _stext;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = (unsigned long) _end;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 543c67cae02f..b8026ec684ba 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -124,6 +124,12 @@ int __init parse_acpi_topology(void)
#endif
#ifdef CONFIG_ARM64_AMU_EXTN
+#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
+#define read_constcnt() read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
+#else
+#define read_corecnt() (0UL)
+#define read_constcnt() (0UL)
+#endif
#undef pr_fmt
#define pr_fmt(fmt) "AMU: " fmt
@@ -133,54 +139,58 @@ static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
-/* Initialize counter reference per-cpu variables for the current CPU */
-void init_cpu_freq_invariance_counters(void)
+void update_freq_counters_refs(void)
{
- this_cpu_write(arch_core_cycles_prev,
- read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0));
- this_cpu_write(arch_const_cycles_prev,
- read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0));
+ this_cpu_write(arch_core_cycles_prev, read_corecnt());
+ this_cpu_write(arch_const_cycles_prev, read_constcnt());
}
-static int validate_cpu_freq_invariance_counters(int cpu)
+static inline bool freq_counters_valid(int cpu)
{
- u64 max_freq_hz, ratio;
+ if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
+ return false;
if (!cpu_has_amu_feat(cpu)) {
pr_debug("CPU%d: counters are not supported.\n", cpu);
- return -EINVAL;
+ return false;
}
if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
!per_cpu(arch_core_cycles_prev, cpu))) {
pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
- return -EINVAL;
+ return false;
}
- /* Convert maximum frequency from KHz to Hz and validate */
- max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000;
- if (unlikely(!max_freq_hz)) {
- pr_debug("CPU%d: invalid maximum frequency.\n", cpu);
+ return true;
+}
+
+static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
+{
+ u64 ratio;
+
+ if (unlikely(!max_rate || !ref_rate)) {
+ pr_debug("CPU%d: invalid maximum or reference frequency.\n",
+ cpu);
return -EINVAL;
}
/*
* Pre-compute the fixed ratio between the frequency of the constant
- * counter and the maximum frequency of the CPU.
+ * reference counter and the maximum frequency of the CPU.
*
- * const_freq
- * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE²
- * cpuinfo_max_freq
+ * ref_rate
+ * arch_max_freq_scale = ---------- * SCHED_CAPACITY_SCALE²
+ * max_rate
*
* We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
* in order to ensure a good resolution for arch_max_freq_scale for
- * very low arch timer frequencies (down to the KHz range which should
+ * very low reference frequencies (down to the KHz range which should
* be unlikely).
*/
- ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT);
- ratio = div64_u64(ratio, max_freq_hz);
+ ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT);
+ ratio = div64_u64(ratio, max_rate);
if (!ratio) {
- WARN_ONCE(1, "System timer frequency too low.\n");
+ WARN_ONCE(1, "Reference frequency too low.\n");
return -EINVAL;
}
@@ -227,8 +237,12 @@ static int __init init_amu_fie(void)
}
for_each_present_cpu(cpu) {
- if (validate_cpu_freq_invariance_counters(cpu))
+ if (!freq_counters_valid(cpu) ||
+ freq_inv_set_max_ratio(cpu,
+ cpufreq_get_hw_max_freq(cpu) * 1000,
+ arch_timer_get_rate()))
continue;
+
cpumask_set_cpu(cpu, valid_cpus);
have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
}
@@ -280,11 +294,14 @@ void topology_scale_freq_tick(void)
if (!cpumask_test_cpu(cpu, amu_fie_cpus))
return;
- const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0);
- core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+ update_freq_counters_refs();
+
+ const_cnt = this_cpu_read(arch_const_cycles_prev);
+ core_cnt = this_cpu_read(arch_core_cycles_prev);
+
if (unlikely(core_cnt <= prev_core_cnt ||
const_cnt <= prev_const_cnt))
goto store_and_exit;
@@ -309,4 +326,71 @@ store_and_exit:
this_cpu_write(arch_core_cycles_prev, core_cnt);
this_cpu_write(arch_const_cycles_prev, const_cnt);
}
-#endif /* CONFIG_ARM64_AMU_EXTN */
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+static void cpu_read_corecnt(void *val)
+{
+ *(u64 *)val = read_corecnt();
+}
+
+static void cpu_read_constcnt(void *val)
+{
+ *(u64 *)val = read_constcnt();
+}
+
+static inline
+int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
+{
+ /*
+ * Abort call on counterless CPU or when interrupts are
+ * disabled - can lead to deadlock in smp sync call.
+ */
+ if (!cpu_has_amu_feat(cpu))
+ return -EOPNOTSUPP;
+
+ if (WARN_ON_ONCE(irqs_disabled()))
+ return -EPERM;
+
+ smp_call_function_single(cpu, func, val, 1);
+
+ return 0;
+}
+
+/*
+ * Refer to drivers/acpi/cppc_acpi.c for the description of the functions
+ * below.
+ */
+bool cpc_ffh_supported(void)
+{
+ return freq_counters_valid(get_cpu_with_amu_feat());
+}
+
+int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val)
+{
+ int ret = -EOPNOTSUPP;
+
+ switch ((u64)reg->address) {
+ case 0x0:
+ ret = counters_read_on_cpu(cpu, cpu_read_corecnt, val);
+ break;
+ case 0x1:
+ ret = counters_read_on_cpu(cpu, cpu_read_constcnt, val);
+ break;
+ }
+
+ if (!ret) {
+ *val &= GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+ reg->bit_offset);
+ *val >>= reg->bit_offset;
+ }
+
+ return ret;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d65f52264aba..a8f8e409e2bf 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
$(btildflags-y) -T
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
KASAN_SANITIZE := n
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 79280c53b9a6..a1e0f91e6cea 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\
# As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
# Common C and assembly flags
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1bda604f4c70..94a08e3e32b1 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -121,7 +121,7 @@ SECTIONS
_text = .;
HEAD_TEXT
}
- .text : { /* Real text segment */
+ .text : ALIGN(SEGMENT_ALIGN) { /* Real text segment */
_stext = .; /* Text and read-only data */
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
@@ -201,7 +201,7 @@ SECTIONS
INIT_CALLS
CON_INITCALL
INIT_RAM_FS
- *(.init.rodata.* .init.bss) /* from the EFI stub */
+ *(.init.altinstructions .init.rodata.* .init.bss) /* from the EFI stub */
}
.exit.data : {
EXIT_DATA
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 57972bdb213a..1a01da9fdc99 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
switch (vma_shift) {
+#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
break;
fallthrough;
+#endif
case CONT_PMD_SHIFT:
vma_shift = PMD_SHIFT;
fallthrough;
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 03ca6d8b8670..cceed41bba15 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -4,7 +4,7 @@
*/
#include <linux/linkage.h>
-#include <asm/alternative.h>
+#include <asm/asm-uaccess.h>
#include <asm/assembler.h>
#include <asm/mte.h>
#include <asm/page.h>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 095540667f0f..fbd452e12397 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -29,6 +29,7 @@
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/hugetlb.h>
+#include <linux/acpi_iort.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -42,8 +43,6 @@
#include <asm/tlb.h>
#include <asm/alternative.h>
-#define ARM64_ZONE_DMA_BITS 30
-
/*
* We need to be able to catch inadvertent references to memstart_addr
* that occur (potentially in generic code) before arm64_memblock_init()
@@ -175,21 +174,34 @@ static void __init reserve_elfcorehdr(void)
#endif /* CONFIG_CRASH_DUMP */
/*
- * Return the maximum physical address for a zone with a given address size
- * limit. It currently assumes that for memory starting above 4G, 32-bit
- * devices will use a DMA offset.
+ * Return the maximum physical address for a zone accessible by the given bits
+ * limit. If DRAM starts above 32-bit, expand the zone to the maximum
+ * available memory, otherwise cap it at 32-bit.
*/
static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
{
- phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
- return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
+ phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits);
+ phys_addr_t phys_start = memblock_start_of_DRAM();
+
+ if (phys_start > U32_MAX)
+ zone_mask = PHYS_ADDR_MAX;
+ else if (phys_start > zone_mask)
+ zone_mask = U32_MAX;
+
+ return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
}
static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
+ unsigned int __maybe_unused acpi_zone_dma_bits;
+ unsigned int __maybe_unused dt_zone_dma_bits;
#ifdef CONFIG_ZONE_DMA
+ acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
+ dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
+ zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits);
+ arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
#endif
#ifdef CONFIG_ZONE_DMA32
@@ -269,7 +281,7 @@ static void __init fdt_enforce_memory_region(void)
void __init arm64_memblock_init(void)
{
- const s64 linear_region_size = BIT(vabits_actual - 1);
+ const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
/* Handle linux,usable-memory-range property */
fdt_enforce_memory_region();
@@ -348,15 +360,18 @@ void __init arm64_memblock_init(void)
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
extern u16 memstart_offset_seed;
- u64 range = linear_region_size -
- (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+ u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+ int parange = cpuid_feature_extract_unsigned_field(
+ mmfr0, ID_AA64MMFR0_PARANGE_SHIFT);
+ s64 range = linear_region_size -
+ BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
/*
* If the size of the linear region exceeds, by a sufficient
- * margin, the size of the region that the available physical
- * memory spans, randomize the linear region as well.
+ * margin, the size of the region that the physical memory can
+ * span, randomize the linear region as well.
*/
- if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+ if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) {
range /= ARM64_MEMSTART_ALIGN;
memstart_addr -= ARM64_MEMSTART_ALIGN *
((range * memstart_offset_seed) >> 16);
@@ -367,7 +382,7 @@ void __init arm64_memblock_init(void)
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
- memblock_reserve(__pa_symbol(_text), _end - _text);
+ memblock_reserve(__pa_symbol(_stext), _end - _stext);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
@@ -376,18 +391,11 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
- zone_dma_bits = ARM64_ZONE_DMA_BITS;
- arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
- }
-
if (IS_ENABLED(CONFIG_ZONE_DMA32))
arm64_dma32_phys_limit = max_zone_phys(32);
else
arm64_dma32_phys_limit = PHYS_MASK + 1;
- reserve_crashkernel();
-
reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -427,6 +435,12 @@ void __init bootmem_init(void)
sparse_init();
zone_sizes_init(min, max);
+ /*
+ * request_standard_resources() depends on crashkernel's memory being
+ * reserved, so do it here.
+ */
+ reserve_crashkernel();
+
memblock_dump_all();
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1c0f3e02f731..fe0721a44376 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -464,20 +464,35 @@ void __init mark_linear_text_alias_ro(void)
/*
* Remove the write permissions from the linear alias of .text/.rodata
*/
- update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
- (unsigned long)__init_begin - (unsigned long)_text,
+ update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
+ (unsigned long)__init_begin - (unsigned long)_stext,
PAGE_KERNEL_RO);
}
+static bool crash_mem_map __initdata;
+
+static int __init enable_crash_mem_map(char *arg)
+{
+ /*
+ * Proper parameter parsing is done by reserve_crashkernel(). We only
+ * need to know if the linear map has to avoid block mappings so that
+ * the crashkernel reservations can be unmapped later.
+ */
+ crash_mem_map = true;
+
+ return 0;
+}
+early_param("crashkernel", enable_crash_mem_map);
+
static void __init map_mem(pgd_t *pgdp)
{
- phys_addr_t kernel_start = __pa_symbol(_text);
+ phys_addr_t kernel_start = __pa_symbol(_stext);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
phys_addr_t start, end;
int flags = 0;
u64 i;
- if (rodata_full || debug_pagealloc_enabled())
+ if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@@ -487,11 +502,6 @@ static void __init map_mem(pgd_t *pgdp)
* the following for-loop
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
-#ifdef CONFIG_KEXEC_CORE
- if (crashk_res.end)
- memblock_mark_nomap(crashk_res.start,
- resource_size(&crashk_res));
-#endif
/* map all the memory banks */
for_each_mem_range(i, &start, &end) {
@@ -506,7 +516,7 @@ static void __init map_mem(pgd_t *pgdp)
}
/*
- * Map the linear alias of the [_text, __init_begin) interval
+ * Map the linear alias of the [_stext, __init_begin) interval
* as non-executable now, and remove the write permission in
* mark_linear_text_alias_ro() below (which will be called after
* alternative patching has completed). This makes the contents
@@ -518,21 +528,6 @@ static void __init map_mem(pgd_t *pgdp)
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
-
-#ifdef CONFIG_KEXEC_CORE
- /*
- * Use page-level mappings here so that we can shrink the region
- * in page granularity and put back unused memory to buddy system
- * through /sys/kernel/kexec_crash_size interface.
- */
- if (crashk_res.end) {
- __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
- PAGE_KERNEL,
- NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
- memblock_clear_nomap(crashk_res.start,
- resource_size(&crashk_res));
- }
-#endif
}
void mark_rodata_ro(void)
@@ -665,7 +660,7 @@ static void __init map_kernel(pgd_t *pgdp)
* Only rodata will be remapped with different permissions later on,
* all other segments are allowed to use contiguous mappings.
*/
- map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
+ map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0,
VM_NO_GUARD);
map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
@@ -1493,13 +1488,43 @@ static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
unsigned long pfn = arg->start_pfn;
- if (action != MEM_GOING_OFFLINE)
+ if ((action != MEM_GOING_OFFLINE) && (action != MEM_OFFLINE))
return NOTIFY_OK;
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ unsigned long start = PFN_PHYS(pfn);
+ unsigned long end = start + (1UL << PA_SECTION_SHIFT);
+
ms = __pfn_to_section(pfn);
- if (early_section(ms))
+ if (!early_section(ms))
+ continue;
+
+ if (action == MEM_GOING_OFFLINE) {
+ /*
+ * Boot memory removal is not supported. Prevent
+ * it via blocking any attempted offline request
+ * for the boot memory and just report it.
+ */
+ pr_warn("Boot memory [%lx %lx] offlining attempted\n", start, end);
return NOTIFY_BAD;
+ } else if (action == MEM_OFFLINE) {
+ /*
+ * This should have never happened. Boot memory
+ * offlining should have been prevented by this
+ * very notifier. Probably some memory removal
+ * procedure might have changed which would then
+ * require further debug.
+ */
+ pr_err("Boot memory [%lx %lx] offlined\n", start, end);
+
+ /*
+ * Core memory hotplug does not process a return
+ * code from the notifier for MEM_OFFLINE events.
+ * The error condition has been reported. Return
+ * from here as if ignored.
+ */
+ return NOTIFY_DONE;
+ }
}
return NOTIFY_OK;
}
@@ -1508,9 +1533,66 @@ static struct notifier_block prevent_bootmem_remove_nb = {
.notifier_call = prevent_bootmem_remove_notifier,
};
+/*
+ * This ensures that boot memory sections on the platform are online
+ * from early boot. Memory sections could not be prevented from being
+ * offlined, unless for some reason they are not online to begin with.
+ * This helps validate the basic assumption on which the above memory
+ * event notifier works to prevent boot memory section offlining and
+ * its possible removal.
+ */
+static void validate_bootmem_online(void)
+{
+ phys_addr_t start, end, addr;
+ struct mem_section *ms;
+ u64 i;
+
+ /*
+ * Scanning across all memblock might be expensive
+ * on some big memory systems. Hence enable this
+ * validation only with DEBUG_VM.
+ */
+ if (!IS_ENABLED(CONFIG_DEBUG_VM))
+ return;
+
+ for_each_mem_range(i, &start, &end) {
+ for (addr = start; addr < end; addr += (1UL << PA_SECTION_SHIFT)) {
+ ms = __pfn_to_section(PHYS_PFN(addr));
+
+ /*
+ * All memory ranges in the system at this point
+ * should have been marked as early sections.
+ */
+ WARN_ON(!early_section(ms));
+
+ /*
+ * Memory notifier mechanism here to prevent boot
+ * memory offlining depends on the fact that each
+ * early section memory on the system is initially
+ * online. Otherwise a given memory section which
+ * is already offline will be overlooked and can
+ * be removed completely. Call out such sections.
+ */
+ if (!online_section(ms))
+ pr_err("Boot memory [%llx %llx] is offline, can be removed\n",
+ addr, addr + (1UL << PA_SECTION_SHIFT));
+ }
+ }
+}
+
static int __init prevent_bootmem_remove_init(void)
{
- return register_memory_notifier(&prevent_bootmem_remove_nb);
+ int ret = 0;
+
+ if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+ return ret;
+
+ validate_bootmem_online();
+ ret = register_memory_notifier(&prevent_bootmem_remove_nb);
+ if (ret)
+ pr_err("%s: Notifier registration failed %d\n", __func__, ret);
+
+ return ret;
}
-device_initcall(prevent_bootmem_remove_init);
+early_initcall(prevent_bootmem_remove_init);
#endif
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 9929ff50c0c0..1787406684aa 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1718,3 +1718,58 @@ void __init acpi_iort_init(void)
iort_init_platform_devices();
}
+
+#ifdef CONFIG_ZONE_DMA
+/*
+ * Extract the highest CPU physical address accessible to all DMA masters in
+ * the system. PHYS_ADDR_MAX is returned when no constrained device is found.
+ */
+phys_addr_t __init acpi_iort_dma_get_max_cpu_address(void)
+{
+ phys_addr_t limit = PHYS_ADDR_MAX;
+ struct acpi_iort_node *node, *end;
+ struct acpi_table_iort *iort;
+ acpi_status status;
+ int i;
+
+ if (acpi_disabled)
+ return limit;
+
+ status = acpi_get_table(ACPI_SIG_IORT, 0,
+ (struct acpi_table_header **)&iort);
+ if (ACPI_FAILURE(status))
+ return limit;
+
+ node = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->node_offset);
+ end = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->header.length);
+
+ for (i = 0; i < iort->node_count; i++) {
+ if (node >= end)
+ break;
+
+ switch (node->type) {
+ struct acpi_iort_named_component *ncomp;
+ struct acpi_iort_root_complex *rc;
+ phys_addr_t local_limit;
+
+ case ACPI_IORT_NODE_NAMED_COMPONENT:
+ ncomp = (struct acpi_iort_named_component *)node->node_data;
+ local_limit = DMA_BIT_MASK(ncomp->memory_address_limit);
+ limit = min_not_zero(limit, local_limit);
+ break;
+
+ case ACPI_IORT_NODE_PCI_ROOT_COMPLEX:
+ if (node->revision < 1)
+ break;
+
+ rc = (struct acpi_iort_root_complex *)node->node_data;
+ local_limit = DMA_BIT_MASK(rc->memory_address_limit);
+ limit = min_not_zero(limit, local_limit);
+ break;
+ }
+ node = ACPI_ADD_PTR(struct acpi_iort_node, node, node->length);
+ }
+ acpi_put_table(&iort->header);
+ return limit;
+}
+#endif
diff --git a/drivers/of/address.c b/drivers/of/address.c
index eb9ab4f1e80b..09c0af7fd1c4 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -1025,6 +1025,48 @@ out:
#endif /* CONFIG_HAS_DMA */
/**
+ * of_dma_get_max_cpu_address - Gets highest CPU address suitable for DMA
+ * @np: The node to start searching from or NULL to start from the root
+ *
+ * Gets the highest CPU physical address that is addressable by all DMA masters
+ * in the sub-tree pointed by np, or the whole tree if NULL is passed. If no
+ * DMA constrained device is found, it returns PHYS_ADDR_MAX.
+ */
+phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
+{
+ phys_addr_t max_cpu_addr = PHYS_ADDR_MAX;
+ struct of_range_parser parser;
+ phys_addr_t subtree_max_addr;
+ struct device_node *child;
+ struct of_range range;
+ const __be32 *ranges;
+ u64 cpu_end = 0;
+ int len;
+
+ if (!np)
+ np = of_root;
+
+ ranges = of_get_property(np, "dma-ranges", &len);
+ if (ranges && len) {
+ of_dma_range_parser_init(&parser, np);
+ for_each_of_range(&parser, &range)
+ if (range.cpu_addr + range.size > cpu_end)
+ cpu_end = range.cpu_addr + range.size - 1;
+
+ if (max_cpu_addr > cpu_end)
+ max_cpu_addr = cpu_end;
+ }
+
+ for_each_available_child_of_node(np, child) {
+ subtree_max_addr = of_dma_get_max_cpu_address(child);
+ if (max_cpu_addr > subtree_max_addr)
+ max_cpu_addr = subtree_max_addr;
+ }
+
+ return max_cpu_addr;
+}
+
+/**
* of_dma_is_coherent - Check if device is coherent
* @np: device node
*
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 06cc988faf78..eb51bc147440 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -869,6 +869,26 @@ static void __init of_unittest_changeset(void)
#endif
}
+static void __init of_unittest_dma_get_max_cpu_address(void)
+{
+ struct device_node *np;
+ phys_addr_t cpu_addr;
+
+ if (!IS_ENABLED(CONFIG_OF_ADDRESS))
+ return;
+
+ np = of_find_node_by_path("/testcase-data/address-tests");
+ if (!np) {
+ pr_err("missing testcase data\n");
+ return;
+ }
+
+ cpu_addr = of_dma_get_max_cpu_address(np);
+ unittest(cpu_addr == 0x4fffffff,
+ "of_dma_get_max_cpu_address: wrong CPU addr %pad (expecting %x)\n",
+ &cpu_addr, 0x4fffffff);
+}
+
static void __init of_unittest_dma_ranges_one(const char *path,
u64 expect_dma_addr, u64 expect_paddr)
{
@@ -3266,6 +3286,7 @@ static int __init of_unittest(void)
of_unittest_changeset();
of_unittest_parse_interrupts();
of_unittest_parse_interrupts_extended();
+ of_unittest_dma_get_max_cpu_address();
of_unittest_parse_dma_ranges();
of_unittest_pci_dma_ranges();
of_unittest_match_node();
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 20a32120bb88..1a12baa58e40 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
const u32 *id_in);
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
+phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
#else
static inline void acpi_iort_init(void) { }
static inline u32 iort_msi_map_id(struct device *dev, u32 id)
@@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id(
static inline
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
{ return 0; }
+
+static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void)
+{ return PHYS_ADDR_MAX; }
#endif
#endif /* __ACPI_IORT_H__ */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fb3bf696c05e..9d0c454d23cd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -354,26 +354,6 @@ enum zone_type {
* DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
* platforms may need both zones as they support peripherals with
* different DMA addressing limitations.
- *
- * Some examples:
- *
- * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the
- * rest of the lower 4G.
- *
- * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on
- * the specific device.
- *
- * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the
- * lower 4G.
- *
- * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary
- * depending on the specific device.
- *
- * - s390 uses ZONE_DMA fixed to the lower 2G.
- *
- * - ia64 and riscv only use ZONE_DMA32.
- *
- * - parisc uses neither.
*/
#ifdef CONFIG_ZONE_DMA
ZONE_DMA,
diff --git a/include/linux/of.h b/include/linux/of.h
index 5d51891cbf1a..9ed5b8532c30 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id,
const char *map_name, const char *map_mask_name,
struct device_node **target, u32 *id_out);
+phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);
+
#else /* CONFIG_OF */
static inline void of_core_init(void)
@@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id,
return -EINVAL;
}
+static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
+{
+ return PHYS_ADDR_MAX;
+}
+
#define of_match_ptr(_ptr) NULL
#define of_match_node(_matches, _node) NULL
#endif /* CONFIG_OF */