summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2021-06-25 18:24:24 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2021-06-25 18:24:24 +0300
commitb8917b4ae44d1b945f6fba3d8ee6777edb44633b (patch)
tree0e3b6b1113b40d444f078b3bedcd98b93ac78f4a /arch/arm64
parent79b1e56509beb8d53b2b92f27555cd2175c67b8a (diff)
parent188982cda00ebfe28b50c2905d9bbaa2e9a001b9 (diff)
downloadlinux-b8917b4ae44d1b945f6fba3d8ee6777edb44633b.tar.xz
Merge tag 'kvmarm-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for v5.14. - Add MTE support in guests, complete with tag save/restore interface - Reduce the impact of CMOs by moving them in the page-table code - Allow device block mappings at stage-2 - Reduce the footprint of the vmemmap in protected mode - Support the vGIC on dumb systems such as the Apple M1 - Add selftest infrastructure to support multiple configuration and apply that to PMU/non-PMU setups - Add selftests for the debug architecture - The usual crop of PMU fixes
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kbuild3
-rw-r--r--arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi2
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774a1.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774b1.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts2
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774c0.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774e1.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77950.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77951.dtsi12
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77960.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77961.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77965.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77970.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77980.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts2
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77990.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/salvator-common.dtsi3
-rw-r--r--arch/arm64/include/asm/alternative-macros.h5
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h3
-rw-r--r--arch/arm64/include/asm/assembler.h80
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h71
-rw-r--r--arch/arm64/include/asm/efi.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h14
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h17
-rw-r--r--arch/arm64/include/asm/kvm_mte.h66
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h42
-rw-r--r--arch/arm64/include/asm/mte-def.h1
-rw-r--r--arch/arm64/include/asm/mte.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h22
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/include/asm/unistd32.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h11
-rw-r--r--arch/arm64/kernel/alternative.c2
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/efi-entry.S9
-rw-r--r--arch/arm64/kernel/head.S13
-rw-r--r--arch/arm64/kernel/hibernate-asm.S7
-rw-r--r--arch/arm64/kernel/hibernate.c20
-rw-r--r--arch/arm64/kernel/idreg-override.c3
-rw-r--r--arch/arm64/kernel/image-vars.h2
-rw-r--r--arch/arm64/kernel/insn.c2
-rw-r--r--arch/arm64/kernel/kaslr.c12
-rw-r--r--arch/arm64/kernel/machine_kexec.c30
-rw-r--r--arch/arm64/kernel/mte.c18
-rw-r--r--arch/arm64/kernel/probes/uprobes.c2
-rw-r--r--arch/arm64/kernel/smp.c8
-rw-r--r--arch/arm64/kernel/smp_spin_table.c7
-rw-r--r--arch/arm64/kernel/sys_compat.c2
-rw-r--r--arch/arm64/kvm/arch_timer.c162
-rw-r--r--arch/arm64/kvm/arm.c22
-rw-r--r--arch/arm64/kvm/guest.c86
-rw-r--r--arch/arm64/kvm/hyp/entry.S7
-rw-r--r--arch/arm64/kvm/hyp/exception.c3
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h21
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h45
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/memory.h7
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/cache.S4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c60
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c112
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c33
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c61
-rw-r--r--arch/arm64/kvm/hyp/reserved_mem.c3
-rw-r--r--arch/arm64/kvm/mmu.c196
-rw-r--r--arch/arm64/kvm/pmu-emul.c4
-rw-r--r--arch/arm64/kvm/reset.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c32
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c36
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic.c14
-rw-r--r--arch/arm64/lib/uaccess_flushcache.c4
-rw-r--r--arch/arm64/mm/cache.S158
-rw-r--r--arch/arm64/mm/flush.c29
-rw-r--r--arch/arm64/mm/mmu.c3
81 files changed, 1229 insertions, 525 deletions
diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild
index d6465823b281..7b393cfec071 100644
--- a/arch/arm64/Kbuild
+++ b/arch/arm64/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += kernel/ mm/
-obj-$(CONFIG_NET) += net/
+obj-y += kernel/ mm/ net/
obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_XEN) += xen/
obj-$(CONFIG_CRYPTO) += crypto/
diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
index c62ddb9b2ba5..3771144a2ce4 100644
--- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
+++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
@@ -14,7 +14,6 @@
ports {
port@0 {
- reg = <0>;
csi20_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
@@ -29,7 +28,6 @@
ports {
port@0 {
- reg = <0>;
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
index d64fb8b1b86c..46f8dbf68904 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
@@ -2573,6 +2573,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2628,6 +2632,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
index 5b05474dc272..d16a4be5ef77 100644
--- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
@@ -2419,6 +2419,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2474,6 +2478,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
index e7b4a929bb17..2e3d1981cac4 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
+++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
@@ -33,7 +33,7 @@
status = "okay";
ports {
- port {
+ port@0 {
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
index 20fa3caa050e..1aef34447abd 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
@@ -1823,6 +1823,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
index 8eb006cbd9af..1f51237ab0a6 100644
--- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
@@ -2709,6 +2709,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2764,6 +2768,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
index 25b87da32eeb..b643d3079db1 100644
--- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
@@ -192,6 +192,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
index 5c39152e4570..85d66d15465a 100644
--- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
@@ -3097,6 +3097,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -3152,6 +3156,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -3191,6 +3199,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
index 25d947a81b29..12476e354d74 100644
--- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
@@ -2761,6 +2761,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2816,6 +2820,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
index ab081f14af9a..d9804768425a 100644
--- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
@@ -2499,6 +2499,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2554,6 +2558,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
index 657b20d3533b..dcb9df861d74 100644
--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
@@ -2575,6 +2575,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2630,6 +2634,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
index 5a5d5649332a..e8f6352c3665 100644
--- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
@@ -1106,6 +1106,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
index 1ffa4a995a7a..7b51d464de0e 100644
--- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
@@ -1439,6 +1439,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -1478,6 +1482,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
index 295d34f1d216..4715e4a4abe0 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
+++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
@@ -298,8 +298,6 @@
ports {
port@0 {
- reg = <0>;
-
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
index 5010f23fafcc..0eaea58f4210 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
@@ -1970,6 +1970,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index e18747df219f..453ffcef24fa 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -349,7 +349,6 @@
ports {
port@0 {
- reg = <0>;
csi20_in: endpoint {
clock-lanes = <0>;
data-lanes = <1>;
@@ -364,8 +363,6 @@
ports {
port@0 {
- reg = <0>;
-
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2 3 4>;
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 8a078fc662ac..477703578caa 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -197,11 +197,6 @@ alternative_endif
#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-.macro user_alt, label, oldinstr, newinstr, cond
-9999: alternative_insn "\oldinstr", "\newinstr", \cond
- _asm_extable 9999b, \label
-.endm
-
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 934b9be582d2..4ad22c3135db 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void)
#define gic_read_lpir(c) readq_relaxed(c)
#define gic_write_lpir(v, c) writeq_relaxed(v, c)
-#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+#define gic_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
#define gits_read_baser(c) readq_relaxed(c)
#define gits_write_baser(v, c) writeq_relaxed(v, c)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8418c1bd8f04..c4cecf85dccf 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -130,15 +130,27 @@ alternative_endif
.endm
/*
- * Emit an entry into the exception table
+ * Create an exception table entry for `insn`, which will branch to `fixup`
+ * when an unhandled fault is taken.
*/
- .macro _asm_extable, from, to
+ .macro _asm_extable, insn, fixup
.pushsection __ex_table, "a"
.align 3
- .long (\from - .), (\to - .)
+ .long (\insn - .), (\fixup - .)
.popsection
.endm
+/*
+ * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
+ * do nothing.
+ */
+ .macro _cond_extable, insn, fixup
+ .ifnc \fixup,
+ _asm_extable \insn, \fixup
+ .endif
+ .endm
+
+
#define USER(l, x...) \
9999: x; \
_asm_extable 9999b, l
@@ -375,51 +387,53 @@ alternative_cb_end
bfi \tcr, \tmp0, \pos, #3
.endm
+ .macro __dcache_op_workaround_clean_cache, op, addr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc \op, \addr
+alternative_else
+ dc civac, \addr
+alternative_endif
+ .endm
+
/*
* Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
+ * [start, end)
*
* op: operation passed to dc instruction
* domain: domain used in dsb instruciton
- * kaddr: starting virtual address of the region
- * size: size of the region
- * Corrupts: kaddr, size, tmp1, tmp2
+ * start: starting virtual address of the region
+ * end: end virtual address of the region
+ * fixup: optional label to branch to on user fault
+ * Corrupts: start, end, tmp1, tmp2
*/
- .macro __dcache_op_workaround_clean_cache, op, kaddr
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc \op, \kaddr
-alternative_else
- dc civac, \kaddr
-alternative_endif
- .endm
-
- .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+ .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2
- add \size, \kaddr, \size
sub \tmp2, \tmp1, #1
- bic \kaddr, \kaddr, \tmp2
-9998:
+ bic \start, \start, \tmp2
+.Ldcache_op\@:
.ifc \op, cvau
- __dcache_op_workaround_clean_cache \op, \kaddr
+ __dcache_op_workaround_clean_cache \op, \start
.else
.ifc \op, cvac
- __dcache_op_workaround_clean_cache \op, \kaddr
+ __dcache_op_workaround_clean_cache \op, \start
.else
.ifc \op, cvap
- sys 3, c7, c12, 1, \kaddr // dc cvap
+ sys 3, c7, c12, 1, \start // dc cvap
.else
.ifc \op, cvadp
- sys 3, c7, c13, 1, \kaddr // dc cvadp
+ sys 3, c7, c13, 1, \start // dc cvadp
.else
- dc \op, \kaddr
+ dc \op, \start
.endif
.endif
.endif
.endif
- add \kaddr, \kaddr, \tmp1
- cmp \kaddr, \size
- b.lo 9998b
+ add \start, \start, \tmp1
+ cmp \start, \end
+ b.lo .Ldcache_op\@
dsb \domain
+
+ _cond_extable .Ldcache_op\@, \fixup
.endm
/*
@@ -427,20 +441,22 @@ alternative_endif
* [start, end)
*
* start, end: virtual addresses describing the region
- * label: A label to branch to on user fault.
+ * fixup: optional label to branch to on user fault
* Corrupts: tmp1, tmp2
*/
- .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+ .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
icache_line_size \tmp1, \tmp2
sub \tmp2, \tmp1, #1
bic \tmp2, \start, \tmp2
-9997:
-USER(\label, ic ivau, \tmp2) // invalidate I line PoU
+.Licache_op\@:
+ ic ivau, \tmp2 // invalidate I line PoU
add \tmp2, \tmp2, \tmp1
cmp \tmp2, \end
- b.lo 9997b
+ b.lo .Licache_op\@
dsb ish
isb
+
+ _cond_extable .Licache_op\@, \fixup
.endm
/*
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 2175ec0004ed..451e11e5fd23 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
* This insanity brought to you by speculative system register reads,
* out-of-order memory accesses, sequence locks and Thomas Gleixner.
*
- * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
+ * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/
*/
#define arch_counter_enforce_ordering(val) do { \
u64 tmp, _val = (val); \
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 52e5c1623224..543c997eb3b7 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -30,45 +30,58 @@
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
* VIPT I-cache.
*
- * flush_icache_range(start, end)
+ * All functions below apply to the interval [start, end)
+ * - start - virtual start address (inclusive)
+ * - end - virtual end address (exclusive)
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * caches_clean_inval_pou(start, end)
*
- * invalidate_icache_range(start, end)
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
*
- * Invalidate the I-cache in the region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * caches_clean_inval_user_pou(start, end)
*
- * __flush_cache_user_range(start, end)
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
+ * Use only if the region might access user memory.
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * icache_inval_pou(start, end)
*
- * __flush_dcache_area(kaddr, size)
+ * Invalidate I-cache region to the Point of Unification.
*
- * Ensure that the data held in page is written back.
- * - kaddr - page address
- * - size - region size
+ * dcache_clean_inval_poc(start, end)
+ *
+ * Clean and invalidate D-cache region to the Point of Coherency.
+ *
+ * dcache_inval_poc(start, end)
+ *
+ * Invalidate D-cache region to the Point of Coherency.
+ *
+ * dcache_clean_poc(start, end)
+ *
+ * Clean D-cache region to the Point of Coherency.
+ *
+ * dcache_clean_pop(start, end)
+ *
+ * Clean D-cache region to the Point of Persistence.
+ *
+ * dcache_clean_pou(start, end)
+ *
+ * Clean D-cache region to the Point of Unification.
*/
-extern void __flush_icache_range(unsigned long start, unsigned long end);
-extern int invalidate_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(void *addr, size_t len);
-extern void __inval_dcache_area(void *addr, size_t len);
-extern void __clean_dcache_area_poc(void *addr, size_t len);
-extern void __clean_dcache_area_pop(void *addr, size_t len);
-extern void __clean_dcache_area_pou(void *addr, size_t len);
-extern long __flush_cache_user_range(unsigned long start, unsigned long end);
-extern void sync_icache_aliases(void *kaddr, unsigned long len);
+extern void caches_clean_inval_pou(unsigned long start, unsigned long end);
+extern void icache_inval_pou(unsigned long start, unsigned long end);
+extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_pop(unsigned long start, unsigned long end);
+extern void dcache_clean_pou(unsigned long start, unsigned long end);
+extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(unsigned long start, unsigned long end);
static inline void flush_icache_range(unsigned long start, unsigned long end)
{
- __flush_icache_range(start, end);
+ caches_clean_inval_pou(start, end);
/*
* IPI all online CPUs so that they undergo a context synchronization
@@ -122,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
-static __always_inline void __flush_icache_all(void)
+static __always_inline void icache_inval_all_pou(void)
{
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
return;
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 3578aba9c608..1bed37eb013a 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -137,7 +137,7 @@ void efi_virtmap_unload(void);
static inline void efi_capsule_flush_cache_range(void *addr, int size)
{
- __flush_dcache_area(addr, size);
+ dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
}
#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 692c9049befa..d436831dd706 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,7 +12,8 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
-#define HCR_ATA (UL(1) << 56)
+#define HCR_ATA_SHIFT 56
+#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
#define HCR_FWB (UL(1) << 46)
#define HCR_API (UL(1) << 41)
#define HCR_APK (UL(1) << 40)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 01b9857757f2..fd418955e31e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
}
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5a2c82f63baa..41911585ae0c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -132,6 +133,9 @@ struct kvm_arch {
u8 pfr0_csv2;
u8 pfr0_csv3;
+
+ /* Memory Tagging Extension enabled for the guest */
+ bool mte_enabled;
};
struct kvm_vcpu_fault_info {
@@ -206,6 +210,12 @@ enum vcpu_sysreg {
CNTP_CVAL_EL0,
CNTP_CTL_EL0,
+ /* Memory Tagging Extension registers */
+ RGSR_EL1, /* Random Allocation Tag Seed Register */
+ GCR_EL1, /* Tag Control Register */
+ TFSR_EL1, /* Tag Fault Status Register (EL1) */
+ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -716,6 +726,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags);
+
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
@@ -764,6 +777,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 25ed956f9af1..b52c5c4b9a3d 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -180,17 +180,16 @@ static inline void *__kvm_vector_slot2addr(void *base,
struct kvm;
-#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+#define kvm_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+static inline void __clean_dcache_guest_page(void *va, size_t size)
{
- void *va = page_address(pfn_to_page(pfn));
-
/*
* With FWB, we ensure that the guest always accesses memory using
* cacheable attributes, and we don't have to clean to PoC when
@@ -203,18 +202,14 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
kvm_flush_dcache_to_poc(va, size);
}
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
- unsigned long size)
+static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
if (icache_is_aliasing()) {
/* any kind of VIPT cache */
- __flush_icache_all();
+ icache_inval_all_pou();
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
- void *va = page_address(pfn_to_page(pfn));
-
- invalidate_icache_range((unsigned long)va,
- (unsigned long)va + size);
+ icache_inval_pou((unsigned long)va, (unsigned long)va + size);
}
}
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
new file mode 100644
index 000000000000..de002636eb1f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2021 ARM Ltd.
+ */
+#ifndef __ASM_KVM_MTE_H
+#define __ASM_KVM_MTE_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_MTE
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\h_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\g_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+ isb
+
+.L__skip_switch\@:
+.endm
+
+#else /* !CONFIG_ARM64_MTE */
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+.endm
+
+#endif /* CONFIG_ARM64_MTE */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index c3674c47d48c..f004c0115d89 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -27,23 +27,29 @@ typedef u64 kvm_pte_t;
/**
* struct kvm_pgtable_mm_ops - Memory management callbacks.
- * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter
- * can be used by the walker to pass a memcache. The
- * initial refcount of the page is 1.
- * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The
- * @size parameter is in bytes, and is rounded-up to the
- * next page boundary. The resulting allocation is
- * physically contiguous.
- * @free_pages_exact: Free an exact number of memory pages previously
- * allocated by zalloc_pages_exact.
- * @get_page: Increment the refcount on a page.
- * @put_page: Decrement the refcount on a page. When the refcount
- * reaches 0 the page is automatically freed.
- * @page_count: Return the refcount of a page.
- * @phys_to_virt: Convert a physical address into a virtual address mapped
- * in the current context.
- * @virt_to_phys: Convert a virtual address mapped in the current context
- * into a physical address.
+ * @zalloc_page: Allocate a single zeroed memory page.
+ * The @arg parameter can be used by the walker
+ * to pass a memcache. The initial refcount of
+ * the page is 1.
+ * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
+ * The @size parameter is in bytes, and is rounded
+ * up to the next page boundary. The resulting
+ * allocation is physically contiguous.
+ * @free_pages_exact: Free an exact number of memory pages previously
+ * allocated by zalloc_pages_exact.
+ * @get_page: Increment the refcount on a page.
+ * @put_page: Decrement the refcount on a page. When the
+ * refcount reaches 0 the page is automatically
+ * freed.
+ * @page_count: Return the refcount of a page.
+ * @phys_to_virt: Convert a physical address into a virtual
+ * address mapped in the current context.
+ * @virt_to_phys: Convert a virtual address mapped in the current
+ * context into a physical address.
+ * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC
+ * for the specified memory address range.
+ * @icache_inval_pou: Invalidate the instruction cache to the PoU
+ * for the specified memory address range.
*/
struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
@@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops {
int (*page_count)(void *addr);
void* (*phys_to_virt)(phys_addr_t phys);
phys_addr_t (*virt_to_phys)(void *addr);
+ void (*dcache_clean_inval_poc)(void *addr, size_t size);
+ void (*icache_inval_pou)(void *addr, size_t size);
};
/**
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index cf241b0f0a42..626d359b396e 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -7,6 +7,7 @@
#define MTE_GRANULE_SIZE UL(16)
#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
+#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
#define MTE_TAG_SHIFT 56
#define MTE_TAG_SIZE 4
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7..347ef38a35f7 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage);
/* track which pages have valid allocation tags */
#define PG_mte_tagged PG_arch_2
-void mte_sync_tags(pte_t *ptep, pte_t pte);
+void mte_sync_tags(pte_t old_pte, pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
#define PG_mte_tagged 0
-static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
+static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b10204e72fc..db5402168841 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
- if (system_supports_mte() &&
- pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
- mte_sync_tags(ptep, pte);
+ /*
+ * If the PTE would provide user space access to the tags associated
+ * with it then ensure that the MTE tags are synchronised. Although
+ * pte_access_permitted() returns false for exec only mappings, they
+ * don't expose tags (instruction fetches don't check tags).
+ */
+ if (system_supports_mte() && pte_access_permitted(pte, false) &&
+ !pte_special(pte)) {
+ pte_t old_pte = READ_ONCE(*ptep);
+ /*
+ * We only need to synchronise if the new PTE has tags enabled
+ * or if swapping in (in which case another mapping may have
+ * set tags in the past even if this PTE isn't tagged).
+ * (!pte_none() && !pte_present()) is an open coded version of
+ * is_swap_pte()
+ */
+ if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
+ mte_sync_tags(old_pte, pte);
+ }
__check_racy_pte_update(mm, ptep, pte);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 65d15700a168..347ccac2341e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -651,7 +651,8 @@
#define INIT_SCTLR_EL2_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
- SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
+ SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
+ SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 7859749d6628..5dab69d2c22b 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
#define __NR_mount_setattr 442
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
#define __NR_landlock_add_rule 445
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 24223adae150..b3edde68bc3e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
__u32 reserved[12];
};
+struct kvm_arm_copy_mte_tags {
+ __u64 guest_ipa;
+ __u64 length;
+ void __user *addr;
+ __u64 flags;
+ __u64 reserved[2];
+};
+
+#define KVM_ARM_TAGS_TO_GUEST 0
+#define KVM_ARM_TAGS_FROM_GUEST 1
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index c906d20c7b52..3fb79b76e9d9 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu
*/
if (!is_module) {
dsb(ish);
- __flush_icache_all();
+ icache_inval_all_pou();
isb();
/* Ignore ARM64_CB bit from feature mask */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e73..6f0044cb233e 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -111,6 +111,8 @@ int main(void)
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
+ DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
+ DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 0073b24b5d25..61a87fa1c305 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -28,7 +28,8 @@ SYM_CODE_START(efi_enter_kernel)
* stale icache entries from before relocation.
*/
ldr w1, =kernel_size
- bl __clean_dcache_area_poc
+ add x1, x0, x1
+ bl dcache_clean_poc
ic ialluis
/*
@@ -36,8 +37,8 @@ SYM_CODE_START(efi_enter_kernel)
* so that we can safely disable the MMU and caches.
*/
adr x0, 0f
- ldr w1, 3f
- bl __clean_dcache_area_poc
+ adr x1, 3f
+ bl dcache_clean_poc
0:
/* Turn off Dcache and MMU */
mrs x0, CurrentEL
@@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel)
mov x2, xzr
mov x3, xzr
br x19
+3:
SYM_CODE_END(efi_enter_kernel)
-3: .long . - 0b
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 96873dfa67fd..6928cb67d3a0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -117,8 +117,8 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
dmb sy // needed before dc ivac with
// MMU off
- mov x1, #0x20 // 4 x 8 bytes
- b __inval_dcache_area // tail call
+ add x1, x0, #0x20 // 4 x 8 bytes
+ b dcache_inval_poc // tail call
SYM_CODE_END(preserve_boot_args)
/*
@@ -268,8 +268,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
*/
adrp x0, init_pg_dir
adrp x1, init_pg_end
- sub x1, x1, x0
- bl __inval_dcache_area
+ bl dcache_inval_poc
/*
* Clear the init page tables.
@@ -382,13 +381,11 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
adrp x0, idmap_pg_dir
adrp x1, idmap_pg_end
- sub x1, x1, x0
- bl __inval_dcache_area
+ bl dcache_inval_poc
adrp x0, init_pg_dir
adrp x1, init_pg_end
- sub x1, x1, x0
- bl __inval_dcache_area
+ bl dcache_inval_poc
ret x28
SYM_FUNC_END(__create_page_tables)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 8ccca660034e..81c0186a5e32 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -45,7 +45,7 @@
* Because this code has to be copied to a 'safe' page, it can't call out to
* other functions by PC-relative address. Also remember that it may be
* mid-way through over-writing other functions. For this reason it contains
- * code from flush_icache_range() and uses the copy_page() macro.
+ * code from caches_clean_inval_pou() and uses the copy_page() macro.
*
* This 'safe' page is mapped via ttbr0, and executed from there. This function
* switches to a copy of the linear map in ttbr1, performs the restore, then
@@ -87,11 +87,12 @@ SYM_CODE_START(swsusp_arch_suspend_exit)
copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
add x1, x10, #PAGE_SIZE
- /* Clean the copied page to PoU - based on flush_icache_range() */
+ /* Clean the copied page to PoU - based on caches_clean_inval_pou() */
raw_dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x10, x3
-2: dc cvau, x4 /* clean D line / unified line */
+2: /* clean D line / unified line */
+alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
add x4, x4, x2
cmp x4, x1
b.lo 2b
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index b1cef371df2b..46a0b4d6e251 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
return -ENOMEM;
memcpy(page, src_start, length);
- __flush_icache_range((unsigned long)page, (unsigned long)page + length);
+ caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length);
rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
if (rc)
return rc;
@@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length,
return 0;
}
-#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
-
#ifdef CONFIG_ARM64_MTE
static DEFINE_XARRAY(mte_pages);
@@ -383,13 +381,18 @@ int swsusp_arch_suspend(void)
ret = swsusp_save();
} else {
/* Clean kernel core startup/idle code to PoC*/
- dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
- dcache_clean_range(__idmap_text_start, __idmap_text_end);
+ dcache_clean_inval_poc((unsigned long)__mmuoff_data_start,
+ (unsigned long)__mmuoff_data_end);
+ dcache_clean_inval_poc((unsigned long)__idmap_text_start,
+ (unsigned long)__idmap_text_end);
/* Clean kvm setup code to PoC? */
if (el2_reset_needed()) {
- dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
- dcache_clean_range(__hyp_text_start, __hyp_text_end);
+ dcache_clean_inval_poc(
+ (unsigned long)__hyp_idmap_text_start,
+ (unsigned long)__hyp_idmap_text_end);
+ dcache_clean_inval_poc((unsigned long)__hyp_text_start,
+ (unsigned long)__hyp_text_end);
}
swsusp_mte_restore_tags();
@@ -474,7 +477,8 @@ int swsusp_arch_resume(void)
* The hibernate exit text contains a set of el2 vectors, that will
* be executed at el2 with the mmu off in order to reload hyp-stub.
*/
- __flush_dcache_area(hibernate_exit, exit_size);
+ dcache_clean_inval_poc((unsigned long)hibernate_exit,
+ (unsigned long)hibernate_exit + exit_size);
/*
* KASLR will cause the el2 vectors to be in a different location in
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index e628c8ce1ffe..53a381a7f65d 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void)
for (i = 0; i < ARRAY_SIZE(regs); i++) {
if (regs[i]->override)
- __flush_dcache_area(regs[i]->override,
+ dcache_clean_inval_poc((unsigned long)regs[i]->override,
+ (unsigned long)regs[i]->override +
sizeof(*regs[i]->override));
}
}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index bcf3c2755370..c96a9a0043bf 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -35,7 +35,7 @@ __efistub_strnlen = __pi_strnlen;
__efistub_strcmp = __pi_strcmp;
__efistub_strncmp = __pi_strncmp;
__efistub_strrchr = __pi_strrchr;
-__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc;
+__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
__efistub___memcpy = __pi_memcpy;
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 6c0de2f60ea9..51cb8dc98d00 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -198,7 +198,7 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
ret = aarch64_insn_write(tp, insn);
if (ret == 0)
- __flush_icache_range((uintptr_t)tp,
+ caches_clean_inval_pou((uintptr_t)tp,
(uintptr_t)tp + AARCH64_INSN_SIZE);
return ret;
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 341342b207f6..cfa2cfde3019 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void)
* we end up running with module randomization disabled.
*/
module_alloc_base = (u64)_etext - MODULES_VSIZE;
- __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
+ dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+ (unsigned long)&module_alloc_base +
+ sizeof(module_alloc_base));
/*
* Try to map the FDT early. If this fails, we simply bail,
@@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void)
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
module_alloc_base &= PAGE_MASK;
- __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
- __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
+ dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+ (unsigned long)&module_alloc_base +
+ sizeof(module_alloc_base));
+ dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
+ (unsigned long)&memstart_offset_seed +
+ sizeof(memstart_offset_seed));
return offset;
}
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 90a335c74442..03ceabe4d912 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -68,10 +68,16 @@ int machine_kexec_post_load(struct kimage *kimage)
kimage->arch.kern_reloc = __pa(reloc_code);
kexec_image_info(kimage);
- /* Flush the reloc_code in preparation for its execution. */
- __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
- flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
- arm64_relocate_new_kernel_size);
+ /*
+ * For execution with the MMU off, reloc_code needs to be cleaned to the
+ * PoC and invalidated from the I-cache.
+ */
+ dcache_clean_inval_poc((unsigned long)reloc_code,
+ (unsigned long)reloc_code +
+ arm64_relocate_new_kernel_size);
+ icache_inval_pou((uintptr_t)reloc_code,
+ (uintptr_t)reloc_code +
+ arm64_relocate_new_kernel_size);
return 0;
}
@@ -102,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage)
for (entry = &kimage->head; ; entry++) {
unsigned int flag;
- void *addr;
+ unsigned long addr;
/* flush the list entries. */
- __flush_dcache_area(entry, sizeof(kimage_entry_t));
+ dcache_clean_inval_poc((unsigned long)entry,
+ (unsigned long)entry +
+ sizeof(kimage_entry_t));
flag = *entry & IND_FLAGS;
if (flag == IND_DONE)
break;
- addr = phys_to_virt(*entry & PAGE_MASK);
+ addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
switch (flag) {
case IND_INDIRECTION:
@@ -120,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage)
break;
case IND_SOURCE:
/* flush the source pages. */
- __flush_dcache_area(addr, PAGE_SIZE);
+ dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
break;
case IND_DESTINATION:
break;
@@ -147,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage)
kimage->segment[i].memsz,
kimage->segment[i].memsz / PAGE_SIZE);
- __flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
- kimage->segment[i].memsz);
+ dcache_clean_inval_poc(
+ (unsigned long)phys_to_virt(kimage->segment[i].mem),
+ (unsigned long)phys_to_virt(kimage->segment[i].mem) +
+ kimage->segment[i].memsz);
}
}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 125a10e413e9..69b3fde8759e 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode);
EXPORT_SYMBOL_GPL(mte_async_mode);
#endif
-static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
+static void mte_sync_page_tags(struct page *page, pte_t old_pte,
+ bool check_swap, bool pte_is_tagged)
{
- pte_t old_pte = READ_ONCE(*ptep);
-
if (check_swap && is_swap_pte(old_pte)) {
swp_entry_t entry = pte_to_swp_entry(old_pte);
@@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
return;
}
+ if (!pte_is_tagged)
+ return;
+
page_kasan_tag_reset(page);
/*
* We need smp_wmb() in between setting the flags and clearing the
@@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
mte_clear_page_tags(page_address(page));
}
-void mte_sync_tags(pte_t *ptep, pte_t pte)
+void mte_sync_tags(pte_t old_pte, pte_t pte)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
bool check_swap = nr_pages == 1;
+ bool pte_is_tagged = pte_tagged(pte);
+
+ /* Early out if there's nothing to do */
+ if (!check_swap && !pte_is_tagged)
+ return;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
- mte_sync_page_tags(page, ptep, check_swap);
+ mte_sync_page_tags(page, old_pte, check_swap,
+ pte_is_tagged);
}
}
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 2c247634552b..9be668f3f034 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
memcpy(dst, src, len);
/* flush caches (dcache/icache) */
- sync_icache_aliases(dst, len);
+ sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
kunmap_atomic(xol_page_kaddr);
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dcd7041b2b07..9b4c1118194d 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -122,7 +122,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
secondary_data.task = idle;
secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
update_cpu_boot_status(CPU_MMU_OFF);
- __flush_dcache_area(&secondary_data, sizeof(secondary_data));
+ dcache_clean_inval_poc((unsigned long)&secondary_data,
+ (unsigned long)&secondary_data +
+ sizeof(secondary_data));
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
@@ -143,7 +145,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_crit("CPU%u: failed to come online\n", cpu);
secondary_data.task = NULL;
secondary_data.stack = NULL;
- __flush_dcache_area(&secondary_data, sizeof(secondary_data));
+ dcache_clean_inval_poc((unsigned long)&secondary_data,
+ (unsigned long)&secondary_data +
+ sizeof(secondary_data));
status = READ_ONCE(secondary_data.status);
if (status == CPU_MMU_OFF)
status = READ_ONCE(__early_cpu_boot_status);
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index c45a83512805..7e1624ecab3c 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -36,7 +36,7 @@ static void write_pen_release(u64 val)
unsigned long size = sizeof(secondary_holding_pen_release);
secondary_holding_pen_release = val;
- __flush_dcache_area(start, size);
+ dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size);
}
@@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
* the boot protocol.
*/
writeq_relaxed(pa_holding_pen, release_addr);
- __flush_dcache_area((__force void *)release_addr,
- sizeof(*release_addr));
+ dcache_clean_inval_poc((__force unsigned long)release_addr,
+ (__force unsigned long)release_addr +
+ sizeof(*release_addr));
/*
* Send an event to wake up the secondary CPU.
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 265fe3eb1069..db5159a3055f 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
dsb(ish);
}
- ret = __flush_cache_user_range(start, start + chunk);
+ ret = caches_clean_inval_user_pou(start, start + chunk);
if (ret)
return ret;
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 74e0699661e9..3df67c127489 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -9,6 +9,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/uaccess.h>
#include <clocksource/arm_arch_timer.h>
@@ -973,36 +974,154 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
return 0;
}
-int kvm_timer_hyp_init(bool has_gic)
+static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
- struct arch_timer_kvm_info *info;
- int err;
+ if (vcpu)
+ irqd_set_forwarded_to_vcpu(d);
+ else
+ irqd_clr_forwarded_to_vcpu(d);
- info = arch_timer_get_kvm_info();
- timecounter = &info->timecounter;
+ return 0;
+}
- if (!timecounter->cc) {
- kvm_err("kvm_arch_timer: uninitialized timecounter\n");
- return -ENODEV;
+static int timer_irq_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which, bool val)
+{
+ if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
+ return irq_chip_set_parent_state(d, which, val);
+
+ if (val)
+ irq_chip_mask_parent(d);
+ else
+ irq_chip_unmask_parent(d);
+
+ return 0;
+}
+
+static void timer_irq_eoi(struct irq_data *d)
+{
+ if (!irqd_is_forwarded_to_vcpu(d))
+ irq_chip_eoi_parent(d);
+}
+
+static void timer_irq_ack(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_ack)
+ d->chip->irq_ack(d);
+}
+
+static struct irq_chip timer_chip = {
+ .name = "KVM",
+ .irq_ack = timer_irq_ack,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = timer_irq_eoi,
+ .irq_set_type = irq_chip_set_type_parent,
+ .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
+ .irq_set_irqchip_state = timer_irq_set_irqchip_state,
+};
+
+static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ irq_hw_number_t hwirq = (uintptr_t)arg;
+
+ return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+ &timer_chip, NULL);
+}
+
+static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+}
+
+static const struct irq_domain_ops timer_domain_ops = {
+ .alloc = timer_irq_domain_alloc,
+ .free = timer_irq_domain_free,
+};
+
+static struct irq_ops arch_timer_irq_ops = {
+ .get_input_level = kvm_arch_timer_get_input_level,
+};
+
+static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
+{
+ *flags = irq_get_trigger_type(virq);
+ if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
+ kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
+ virq);
+ *flags = IRQF_TRIGGER_LOW;
}
+}
- /* First, do the virtual EL1 timer irq */
+static int kvm_irq_init(struct arch_timer_kvm_info *info)
+{
+ struct irq_domain *domain = NULL;
if (info->virtual_irq <= 0) {
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
info->virtual_irq);
return -ENODEV;
}
+
host_vtimer_irq = info->virtual_irq;
+ kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
+
+ if (kvm_vgic_global_state.no_hw_deactivation) {
+ struct fwnode_handle *fwnode;
+ struct irq_data *data;
+
+ fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
+ if (!fwnode)
+ return -ENOMEM;
+
+ /* Assume both vtimer and ptimer in the same parent */
+ data = irq_get_irq_data(host_vtimer_irq);
+ domain = irq_domain_create_hierarchy(data->domain, 0,
+ NR_KVM_TIMERS, fwnode,
+ &timer_domain_ops, NULL);
+ if (!domain) {
+ irq_domain_free_fwnode(fwnode);
+ return -ENOMEM;
+ }
+
+ arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
+ WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
+ (void *)TIMER_VTIMER));
+ }
- host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
- if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
- host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
- kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
- host_vtimer_irq);
- host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+ if (info->physical_irq > 0) {
+ host_ptimer_irq = info->physical_irq;
+ kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
+
+ if (domain)
+ WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
+ (void *)TIMER_PTIMER));
}
+ return 0;
+}
+
+int kvm_timer_hyp_init(bool has_gic)
+{
+ struct arch_timer_kvm_info *info;
+ int err;
+
+ info = arch_timer_get_kvm_info();
+ timecounter = &info->timecounter;
+
+ if (!timecounter->cc) {
+ kvm_err("kvm_arch_timer: uninitialized timecounter\n");
+ return -ENODEV;
+ }
+
+ err = kvm_irq_init(info);
+ if (err)
+ return err;
+
+ /* First, do the virtual EL1 timer irq */
+
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest vtimer", kvm_get_running_vcpus());
if (err) {
@@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic)
/* Now let's do the physical EL1 timer irq */
if (info->physical_irq > 0) {
- host_ptimer_irq = info->physical_irq;
- host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
- if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
- host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
- kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
- host_ptimer_irq);
- host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
- }
-
err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
"kvm guest ptimer", kvm_get_running_vcpus());
if (err) {
@@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_vtimer->host_timer_irq,
map.direct_vtimer->irq.irq,
- kvm_arch_timer_get_input_level);
+ &arch_timer_irq_ops);
if (ret)
return ret;
@@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_ptimer->host_timer_irq,
map.direct_ptimer->irq.irq,
- kvm_arch_timer_get_input_level);
+ &arch_timer_irq_ops);
}
if (ret)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e720148232a0..e0b81870ff2a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
kvm->arch.return_nisv_io_abort_to_user = true;
break;
+ case KVM_CAP_ARM_MTE:
+ if (!system_supports_mte() || kvm->created_vcpus)
+ return -EINVAL;
+ r = 0;
+ kvm->arch.mte_enabled = true;
+ break;
default:
r = -EINVAL;
break;
@@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = 1;
break;
+ case KVM_CAP_ARM_MTE:
+ r = system_supports_mte();
+ break;
case KVM_CAP_STEAL_TIME:
r = kvm_arm_pvtime_supported();
break;
@@ -689,6 +698,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
vgic_v4_load(vcpu);
preempt_enable();
}
+
+ if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
+ kvm_pmu_handle_pmcr(vcpu,
+ __vcpu_sys_reg(vcpu, PMCR_EL0));
}
}
@@ -1078,7 +1091,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm);
else
- __flush_icache_all();
+ icache_inval_all_pou();
}
vcpu_reset_hcr(vcpu);
@@ -1350,6 +1363,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0;
}
+ case KVM_ARM_MTE_COPY_TAGS: {
+ struct kvm_arm_copy_mte_tags copy_tags;
+
+ if (copy_from_user(&copy_tags, argp, sizeof(copy_tags)))
+ return -EFAULT;
+ return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
+ }
default:
return -EINVAL;
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 1512a8007a78..1dfb83578277 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1015,3 +1015,89 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
return ret;
}
+
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags)
+{
+ gpa_t guest_ipa = copy_tags->guest_ipa;
+ size_t length = copy_tags->length;
+ void __user *tags = copy_tags->addr;
+ gpa_t gfn;
+ bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
+ int ret = 0;
+
+ if (!kvm_has_mte(kvm))
+ return -EINVAL;
+
+ if (copy_tags->reserved[0] || copy_tags->reserved[1])
+ return -EINVAL;
+
+ if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
+ return -EINVAL;
+
+ if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
+ return -EINVAL;
+
+ gfn = gpa_to_gfn(guest_ipa);
+
+ mutex_lock(&kvm->slots_lock);
+
+ while (length > 0) {
+ kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
+ void *maddr;
+ unsigned long num_tags;
+ struct page *page;
+
+ if (is_error_noslot_pfn(pfn)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ page = pfn_to_online_page(pfn);
+ if (!page) {
+ /* Reject ZONE_DEVICE memory */
+ ret = -EFAULT;
+ goto out;
+ }
+ maddr = page_address(page);
+
+ if (!write) {
+ if (test_bit(PG_mte_tagged, &page->flags))
+ num_tags = mte_copy_tags_to_user(tags, maddr,
+ MTE_GRANULES_PER_PAGE);
+ else
+ /* No tags in memory, so write zeros */
+ num_tags = MTE_GRANULES_PER_PAGE -
+ clear_user(tags, MTE_GRANULES_PER_PAGE);
+ kvm_release_pfn_clean(pfn);
+ } else {
+ num_tags = mte_copy_tags_from_user(maddr, tags,
+ MTE_GRANULES_PER_PAGE);
+
+ /*
+ * Set the flag after checking the write
+ * completed fully
+ */
+ if (num_tags == MTE_GRANULES_PER_PAGE)
+ set_bit(PG_mte_tagged, &page->flags);
+
+ kvm_release_pfn_dirty(pfn);
+ }
+
+ if (num_tags != MTE_GRANULES_PER_PAGE) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ gfn++;
+ tags += num_tags;
+ length -= PAGE_SIZE;
+ }
+
+out:
+ mutex_unlock(&kvm->slots_lock);
+ /* If some data has been copied report the number of bytes copied */
+ if (length != copy_tags->length)
+ return copy_tags->length - length;
+ return ret;
+}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e831d3dfd50d..435346ea1504 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -13,6 +13,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_mte.h>
#include <asm/kvm_ptrauth.h>
.text
@@ -51,6 +52,9 @@ alternative_else_nop_endif
add x29, x0, #VCPU_CONTEXT
+ // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
+ mte_switch_to_guest x29, x1, x2
+
// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
@@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// when this feature is enabled for kernel code.
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
+ // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
+ mte_switch_to_hyp x1, x2, x3
+
// Restore hyp's sp_el0
restore_sp_el0 x2, x3
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 11541b94b328..0418399e0a20 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
- // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+ if (kvm_has_mte(vcpu->kvm))
+ new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5f49df4ffdd8..9aa9b73475c9 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -76,6 +76,7 @@ el1_trap:
b __guest_exit
el1_irq:
+el1_fiq:
get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
@@ -131,7 +132,6 @@ SYM_CODE_END(\label)
invalid_vector el2t_error_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
- invalid_vector el1_fiq_invalid
.ltorg
@@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector)
valid_vect el1_sync // Synchronous 64-bit EL1
valid_vect el1_irq // IRQ 64-bit EL1
- invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
+ valid_vect el1_fiq // FIQ 64-bit EL1
valid_vect el1_error // Error 64-bit EL1
valid_vect el1_sync // Synchronous 32-bit EL1
valid_vect el1_irq // IRQ 32-bit EL1
- invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
+ valid_vect el1_fiq // FIQ 32-bit EL1
valid_vect el1_error // Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index cce43bfe158f..de7e14c862e6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -14,6 +14,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
@@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+ if (!vcpu)
+ vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+ return kvm_has_mte(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
@@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
+ ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
+ }
+
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
@@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
+ write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
+ }
+
if (!has_vhe() &&
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
ctxt->__hyp_running_vcpu) {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 18a4494337bd..fb0f523d1492 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_NO_ORDER UINT_MAX
+#define HYP_NO_ORDER USHRT_MAX
struct hyp_pool {
/*
@@ -19,48 +19,13 @@ struct hyp_pool {
struct list_head free_area[MAX_ORDER];
phys_addr_t range_start;
phys_addr_t range_end;
- unsigned int max_order;
+ unsigned short max_order;
};
-static inline void hyp_page_ref_inc(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- p->refcount++;
- hyp_spin_unlock(&pool->lock);
-}
-
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
- int ret;
-
- hyp_spin_lock(&pool->lock);
- p->refcount--;
- ret = (p->refcount == 0);
- hyp_spin_unlock(&pool->lock);
-
- return ret;
-}
-
-static inline void hyp_set_page_refcounted(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- if (p->refcount) {
- hyp_spin_unlock(&pool->lock);
- BUG();
- }
- p->refcount = 1;
- hyp_spin_unlock(&pool->lock);
-}
-
/* Allocation */
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
-void hyp_get_page(void *addr);
-void hyp_put_page(void *addr);
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_get_page(struct hyp_pool *pool, void *addr);
+void hyp_put_page(struct hyp_pool *pool, void *addr);
/* Used pages cannot be freed */
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 42d81ec739fa..9c227d87c36d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -23,7 +23,7 @@ extern struct host_kvm host_kvm;
int __pkvm_prot_finalize(void);
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
+int kvm_host_prepare_stage2(void *pgt_pool_base);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
static __always_inline void __load_host_stage2(void)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index fd78bde939ee..592b7edb3edb 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -7,12 +7,9 @@
#include <linux/types.h>
-struct hyp_pool;
struct hyp_page {
- unsigned int refcount;
- unsigned int order;
- struct hyp_pool *pool;
- struct list_head node;
+ unsigned short refcount;
+ unsigned short order;
};
extern u64 __hyp_vmemmap;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 0095f6289742..8ec3a5a7744b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
return res;
}
-static inline unsigned long host_s2_mem_pgtable_pages(void)
+static inline unsigned long host_s2_pgtable_pages(void)
{
+ unsigned long res;
+
/*
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
- return __hyp_pgtable_total_pages() + 16;
-}
+ res = __hyp_pgtable_total_pages() + 16;
-static inline unsigned long host_s2_dev_pgtable_pages(void)
-{
/* Allow 1 GiB for MMIO mappings */
- return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
}
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 36cef6915428..958734f4d6b0 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,7 @@
#include <asm/assembler.h>
#include <asm/alternative.h>
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 4b60c0056c04..d938ce95d3bd 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -23,8 +23,7 @@
extern unsigned long hyp_nr_cpus;
struct host_kvm host_kvm;
-static struct hyp_pool host_s2_mem;
-static struct hyp_pool host_s2_dev;
+static struct hyp_pool host_s2_pool;
/*
* Copies of the host's CPU features registers holding sanitized values.
@@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
{
- return hyp_alloc_pages(&host_s2_mem, get_order(size));
+ return hyp_alloc_pages(&host_s2_pool, get_order(size));
}
static void *host_s2_zalloc_page(void *pool)
@@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool)
return hyp_alloc_pages(pool, 0);
}
-static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
+static void host_s2_get_page(void *addr)
+{
+ hyp_get_page(&host_s2_pool, addr);
+}
+
+static void host_s2_put_page(void *addr)
+{
+ hyp_put_page(&host_s2_pool, addr);
+}
+
+static int prepare_s2_pool(void *pgt_pool_base)
{
unsigned long nr_pages, pfn;
int ret;
- pfn = hyp_virt_to_pfn(mem_pgt_pool);
- nr_pages = host_s2_mem_pgtable_pages();
- ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
- if (ret)
- return ret;
-
- pfn = hyp_virt_to_pfn(dev_pgt_pool);
- nr_pages = host_s2_dev_pgtable_pages();
- ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
+ pfn = hyp_virt_to_pfn(pgt_pool_base);
+ nr_pages = host_s2_pgtable_pages();
+ ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
if (ret)
return ret;
@@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.page_count = hyp_page_count,
- .get_page = hyp_get_page,
- .put_page = hyp_put_page,
+ .get_page = host_s2_get_page,
+ .put_page = host_s2_put_page,
};
return 0;
@@ -86,7 +89,7 @@ static void prepare_host_vtcr(void)
id_aa64mmfr1_el1_sys_val, phys_shift);
}
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
+int kvm_host_prepare_stage2(void *pgt_pool_base)
{
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
int ret;
@@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
- ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
+ ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
@@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end)
}
static inline int __host_stage2_idmap(u64 start, u64 end,
- enum kvm_pgtable_prot prot,
- struct hyp_pool *pool)
+ enum kvm_pgtable_prot prot)
{
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
- prot, pool);
+ prot, &host_s2_pool);
}
static int host_stage2_idmap(u64 addr)
@@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr)
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
struct kvm_mem_range range;
bool is_memory = find_mem_range(addr, &range);
- struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
int ret;
if (is_memory)
@@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr)
if (ret)
goto unlock;
- ret = __host_stage2_idmap(range.start, range.end, prot, pool);
- if (is_memory || ret != -ENOMEM)
+ ret = __host_stage2_idmap(range.start, range.end, prot);
+ if (ret != -ENOMEM)
goto unlock;
/*
- * host_s2_mem has been provided with enough pages to cover all of
- * memory with page granularity, so we should never hit the ENOMEM case.
- * However, it is difficult to know how much of the MMIO range we will
- * need to cover upfront, so we may need to 'recycle' the pages if we
- * run out.
+ * The pool has been provided with enough pages to cover all of memory
+ * with page granularity, but it is difficult to know how much of the
+ * MMIO range we will need to cover upfront, so we may need to 'recycle'
+ * the pages if we run out.
*/
ret = host_stage2_unmap_dev_all();
if (ret)
goto unlock;
- ret = __host_stage2_idmap(range.start, range.end, prot, pool);
+ ret = __host_stage2_idmap(range.start, range.end, prot);
unlock:
hyp_spin_unlock(&host_kvm.lock);
@@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
hyp_spin_lock(&host_kvm.lock);
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
- &host_s2_mem, pkvm_hyp_id);
+ &host_s2_pool, pkvm_hyp_id);
hyp_spin_unlock(&host_kvm.lock);
return ret != -EAGAIN ? ret : 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 237e03bf0cb1..41fc25bdfb34 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
*/
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
phys_addr_t addr = hyp_page_to_phys(p);
@@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
/* Find a buddy page currently available for allocation */
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
- if (!buddy || buddy->order != order || list_empty(&buddy->node))
+ if (!buddy || buddy->order != order || buddy->refcount)
return NULL;
return buddy;
}
+/*
+ * Pages that are available for allocation are tracked in free-lists, so we use
+ * the pages themselves to store the list nodes to avoid wasting space. As the
+ * allocator always returns zeroed pages (which are zeroed on the hyp_put_page()
+ * path to optimize allocation speed), we also need to clean-up the list node in
+ * each page when we take it out of the list.
+ */
+static inline void page_remove_from_list(struct hyp_page *p)
+{
+ struct list_head *node = hyp_page_to_virt(p);
+
+ __list_del_entry(node);
+ memset(node, 0, sizeof(*node));
+}
+
+static inline void page_add_to_list(struct hyp_page *p, struct list_head *head)
+{
+ struct list_head *node = hyp_page_to_virt(p);
+
+ INIT_LIST_HEAD(node);
+ list_add_tail(node, head);
+}
+
+static inline struct hyp_page *node_to_page(struct list_head *node)
+{
+ return hyp_virt_to_page(node);
+}
+
static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
- unsigned int order = p->order;
+ unsigned short order = p->order;
struct hyp_page *buddy;
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool,
break;
/* Take the buddy out of its list, and coallesce with @p */
- list_del_init(&buddy->node);
+ page_remove_from_list(buddy);
buddy->order = HYP_NO_ORDER;
p = min(p, buddy);
}
/* Mark the new head, and insert it */
p->order = order;
- list_add_tail(&p->node, &pool->free_area[order]);
-}
-
-static void hyp_attach_page(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- __hyp_attach_page(pool, p);
- hyp_spin_unlock(&pool->lock);
+ page_add_to_list(p, &pool->free_area[order]);
}
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
struct hyp_page *buddy;
- list_del_init(&p->node);
+ page_remove_from_list(p);
while (p->order > order) {
/*
* The buddy of order n - 1 currently has HYP_NO_ORDER as it
@@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
p->order--;
buddy = __find_buddy_nocheck(pool, p, p->order);
buddy->order = p->order;
- list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
+ page_add_to_list(buddy, &pool->free_area[buddy->order]);
}
return p;
}
-void hyp_put_page(void *addr)
+static inline void hyp_page_ref_inc(struct hyp_page *p)
{
- struct hyp_page *p = hyp_virt_to_page(addr);
+ BUG_ON(p->refcount == USHRT_MAX);
+ p->refcount++;
+}
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+ p->refcount--;
+ return (p->refcount == 0);
+}
+
+static inline void hyp_set_page_refcounted(struct hyp_page *p)
+{
+ BUG_ON(p->refcount);
+ p->refcount = 1;
+}
+
+static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
+{
if (hyp_page_ref_dec_and_test(p))
- hyp_attach_page(p);
+ __hyp_attach_page(pool, p);
+}
+
+/*
+ * Changes to the buddy tree and page refcounts must be done with the hyp_pool
+ * lock held. If a refcount change requires an update to the buddy tree (e.g.
+ * hyp_put_page()), both operations must be done within the same critical
+ * section to guarantee transient states (e.g. a page with null refcount but
+ * not yet attached to a free list) can't be observed by well-behaved readers.
+ */
+void hyp_put_page(struct hyp_pool *pool, void *addr)
+{
+ struct hyp_page *p = hyp_virt_to_page(addr);
+
+ hyp_spin_lock(&pool->lock);
+ __hyp_put_page(pool, p);
+ hyp_spin_unlock(&pool->lock);
}
-void hyp_get_page(void *addr)
+void hyp_get_page(struct hyp_pool *pool, void *addr)
{
struct hyp_page *p = hyp_virt_to_page(addr);
+ hyp_spin_lock(&pool->lock);
hyp_page_ref_inc(p);
+ hyp_spin_unlock(&pool->lock);
}
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{
- unsigned int i = order;
+ unsigned short i = order;
struct hyp_page *p;
hyp_spin_lock(&pool->lock);
@@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
}
/* Extract it from the tree at the right order */
- p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
+ p = node_to_page(pool->free_area[i].next);
p = __hyp_extract_page(pool, p, order);
- hyp_spin_unlock(&pool->lock);
hyp_set_page_refcounted(p);
+ hyp_spin_unlock(&pool->lock);
return hyp_page_to_virt(p);
}
@@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
/* Init the vmemmap portion */
p = hyp_phys_to_page(phys);
- memset(p, 0, sizeof(*p) * nr_pages);
for (i = 0; i < nr_pages; i++) {
- p[i].pool = pool;
- INIT_LIST_HEAD(&p[i].node);
+ p[i].order = 0;
+ hyp_set_page_refcounted(&p[i]);
}
/* Attach the unused pages to the buddy tree */
for (i = reserved_pages; i < nr_pages; i++)
- __hyp_attach_page(pool, &p[i]);
+ __hyp_put_page(pool, &p[i]);
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index a3d3a275344e..0b574d106519 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus;
static void *vmemmap_base;
static void *hyp_pgt_base;
-static void *host_s2_mem_pgt_base;
-static void *host_s2_dev_pgt_base;
+static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static int divide_memory_pool(void *virt, unsigned long size)
@@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size)
if (!hyp_pgt_base)
return -ENOMEM;
- nr_pages = host_s2_mem_pgtable_pages();
- host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
- if (!host_s2_mem_pgt_base)
- return -ENOMEM;
-
- nr_pages = host_s2_dev_pgtable_pages();
- host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
- if (!host_s2_dev_pgt_base)
+ nr_pages = host_s2_pgtable_pages();
+ host_s2_pgt_base = hyp_early_alloc_contig(nr_pages);
+ if (!host_s2_pgt_base)
return -ENOMEM;
return 0;
@@ -134,7 +128,8 @@ static void update_nvhe_init_params(void)
for (i = 0; i < hyp_nr_cpus; i++) {
params = per_cpu_ptr(&kvm_init_params, i);
params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd);
- __flush_dcache_area(params, sizeof(*params));
+ dcache_clean_inval_poc((unsigned long)params,
+ (unsigned long)params + sizeof(*params));
}
}
@@ -143,6 +138,16 @@ static void *hyp_zalloc_hyp_page(void *arg)
return hyp_alloc_pages(&hpool, 0);
}
+static void hpool_get_page(void *addr)
+{
+ hyp_get_page(&hpool, addr);
+}
+
+static void hpool_put_page(void *addr)
+{
+ hyp_put_page(&hpool, addr);
+}
+
void __noreturn __pkvm_init_finalise(void)
{
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -158,7 +163,7 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
+ ret = kvm_host_prepare_stage2(host_s2_pgt_base);
if (ret)
goto out;
@@ -166,8 +171,8 @@ void __noreturn __pkvm_init_finalise(void)
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
- .get_page = hyp_get_page,
- .put_page = hyp_put_page,
+ .get_page = hpool_get_page,
+ .put_page = hpool_put_page,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 83dc3b271bc5..38ed0f6f2703 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
* you should be running with VHE enabled.
*/
if (icache_is_vpipt())
- __flush_icache_all();
+ icache_inval_all_pou();
__tlb_switch_to_host(&cxt);
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c37c1dc4feaf..05321f4165e3 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
mm_ops->put_page(ptep);
}
+static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
+{
+ u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
+ return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+}
+
+static bool stage2_pte_executable(kvm_pte_t pte)
+{
+ return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+}
+
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
struct stage2_map_data *data)
{
kvm_pte_t new, old = *ptep;
u64 granule = kvm_granule_size(level), phys = data->phys;
+ struct kvm_pgtable *pgt = data->mmu->pgt;
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!kvm_block_mapping_supported(addr, end, phys, level))
@@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
}
+ /* Perform CMOs before installation of the guest stage-2 PTE */
+ if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
+ granule);
+
+ if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
+ mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
+
smp_store_release(ptep, new);
if (stage2_pte_is_counted(new))
mm_ops->get_page(ptep);
@@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
return ret;
}
-static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
-{
- u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
-}
-
static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
@@ -839,8 +853,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
if (need_flush) {
- __flush_dcache_area(kvm_pte_follow(pte, mm_ops),
- kvm_granule_size(level));
+ kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+
+ dcache_clean_inval_poc((unsigned long)pte_follow,
+ (unsigned long)pte_follow +
+ kvm_granule_size(level));
}
if (childp)
@@ -861,10 +878,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
struct stage2_attr_data {
- kvm_pte_t attr_set;
- kvm_pte_t attr_clr;
- kvm_pte_t pte;
- u32 level;
+ kvm_pte_t attr_set;
+ kvm_pte_t attr_clr;
+ kvm_pte_t pte;
+ u32 level;
+ struct kvm_pgtable_mm_ops *mm_ops;
};
static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -873,6 +891,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
{
kvm_pte_t pte = *ptep;
struct stage2_attr_data *data = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!kvm_pte_valid(pte))
return 0;
@@ -887,8 +906,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
* but worst-case the access flag update gets lost and will be
* set on the next access instead.
*/
- if (data->pte != pte)
+ if (data->pte != pte) {
+ /*
+ * Invalidate instruction cache before updating the guest
+ * stage-2 PTE if we are going to add executable permission.
+ */
+ if (mm_ops->icache_inval_pou &&
+ stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
+ mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
WRITE_ONCE(*ptep, pte);
+ }
return 0;
}
@@ -903,6 +931,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
struct stage2_attr_data data = {
.attr_set = attr_set & attr_mask,
.attr_clr = attr_clr & attr_mask,
+ .mm_ops = pgt->mm_ops,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_attr_walker,
@@ -988,11 +1017,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
+ kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
return 0;
- __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
+ pte_follow = kvm_pte_follow(pte, mm_ops);
+ dcache_clean_inval_poc((unsigned long)pte_follow,
+ (unsigned long)pte_follow +
+ kvm_granule_size(level));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
index 83ca23ac259b..d654921dd09b 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void)
}
hyp_mem_pages += hyp_s1_pgtable_pages();
- hyp_mem_pages += host_s2_mem_pgtable_pages();
- hyp_mem_pages += host_s2_dev_pgtable_pages();
+ hyp_mem_pages += host_s2_pgtable_pages();
/*
* The hyp_vmemmap needs to be backed by pages, but these pages
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c10207fed2f3..57292dc5ce35 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys)
return __va(phys);
}
+static void clean_dcache_guest_page(void *va, size_t size)
+{
+ __clean_dcache_guest_page(va, size);
+}
+
+static void invalidate_icache_guest_page(void *va, size_t size)
+{
+ __invalidate_icache_guest_page(va, size);
+}
+
/*
* Unmapping vs dcache management:
*
@@ -432,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.page_count = kvm_host_page_count,
.phys_to_virt = kvm_host_va,
.virt_to_phys = kvm_host_pa,
+ .dcache_clean_inval_poc = clean_dcache_guest_page,
+ .icache_inval_pou = invalidate_icache_guest_page,
};
/**
@@ -693,16 +705,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
- __clean_dcache_guest_page(pfn, size);
-}
-
-static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
- __invalidate_icache_guest_page(pfn, size);
-}
-
static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
{
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
@@ -822,6 +824,74 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
return PAGE_SIZE;
}
+static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
+{
+ unsigned long pa;
+
+ if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
+ return huge_page_shift(hstate_vma(vma));
+
+ if (!(vma->vm_flags & VM_PFNMAP))
+ return PAGE_SHIFT;
+
+ VM_BUG_ON(is_vm_hugetlb_page(vma));
+
+ pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PUD_SIZE) <= vma->vm_end)
+ return PUD_SHIFT;
+#endif
+
+ if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PMD_SIZE) <= vma->vm_end)
+ return PMD_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
+/*
+ * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+ * able to see the page's tags and therefore they must be initialised first. If
+ * PG_mte_tagged is set, tags have already been initialised.
+ *
+ * The race in the test/set of the PG_mte_tagged flag is handled by:
+ * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+ * racing to santise the same page
+ * - mmap_lock protects between a VM faulting a page in and the VMM performing
+ * an mprotect() to add VM_MTE
+ */
+static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+ unsigned long size)
+{
+ unsigned long i, nr_pages = size >> PAGE_SHIFT;
+ struct page *page;
+
+ if (!kvm_has_mte(kvm))
+ return 0;
+
+ /*
+ * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+ * that may not support tags.
+ */
+ page = pfn_to_online_page(pfn);
+
+ if (!page)
+ return -EFAULT;
+
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (!test_bit(PG_mte_tagged, &page->flags)) {
+ mte_clear_page_tags(page_address(page));
+ set_bit(PG_mte_tagged, &page->flags);
+ }
+ }
+
+ return 0;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -830,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
bool write_fault, writable, force_pte = false;
bool exec_fault;
bool device = false;
+ bool shared;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -853,7 +924,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- /* Let's check if we will get back a huge page backed by hugetlbfs */
+ /*
+ * Let's check if we will get back a huge page backed by hugetlbfs, or
+ * get block mapping for device MMIO region.
+ */
mmap_read_lock(current->mm);
vma = find_vma_intersection(current->mm, hva, hva + 1);
if (unlikely(!vma)) {
@@ -862,17 +936,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma))
- vma_shift = huge_page_shift(hstate_vma(vma));
- else
- vma_shift = PAGE_SHIFT;
-
- if (logging_active ||
- (vma->vm_flags & VM_PFNMAP)) {
+ /*
+ * logging_active is guaranteed to never be true for VM_PFNMAP
+ * memslots.
+ */
+ if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
+ } else {
+ vma_shift = get_vma_page_shift(vma, hva);
}
+ shared = (vma->vm_flags & VM_PFNMAP);
+
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
@@ -943,8 +1019,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
if (kvm_is_device_pfn(pfn)) {
+ /*
+ * If the page was identified as device early by looking at
+ * the VMA flags, vma_pagesize is already representing the
+ * largest quantity we can map. If instead it was mapped
+ * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
+ * and must not be upgraded.
+ *
+ * In both cases, we don't let transparent_hugepage_adjust()
+ * change things at the last minute.
+ */
device = true;
- force_pte = true;
} else if (logging_active && !write_fault) {
/*
* Only actually map the page as writable if this was a write
@@ -965,19 +1050,25 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !force_pte)
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
+
+ if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+ /* Check the VMM hasn't introduced a new VM_SHARED VMA */
+ if (!shared)
+ ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+ else
+ ret = -EFAULT;
+ if (ret)
+ goto out_unlock;
+ }
+
if (writable)
prot |= KVM_PGTABLE_PROT_W;
- if (fault_status != FSC_PERM && !device)
- clean_dcache_guest_page(pfn, vma_pagesize);
-
- if (exec_fault) {
+ if (exec_fault)
prot |= KVM_PGTABLE_PROT_X;
- invalidate_icache_guest_page(pfn, vma_pagesize);
- }
if (device)
prot |= KVM_PGTABLE_PROT_DEVICE;
@@ -1168,19 +1259,22 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->pte);
+ int ret;
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(range->end - range->start != 1);
- /*
- * We've moved a page around, probably through CoW, so let's treat it
- * just like a translation fault and clean the cache to the PoC.
- */
- clean_dcache_guest_page(pfn, PAGE_SIZE);
+ ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+ if (ret)
+ return false;
/*
+ * We've moved a page around, probably through CoW, so let's treat
+ * it just like a translation fault and the map handler will clean
+ * the cache to the PoC.
+ *
* The MMU notifiers will have unmapped a huge PMD before calling
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
* therefore we never need to clear out a huge PMD through this
@@ -1346,7 +1440,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
hva_t hva = mem->userspace_addr;
hva_t reg_end = hva + mem->memory_size;
- bool writable = !(mem->flags & KVM_MEM_READONLY);
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1363,8 +1456,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
- * between them, so iterate over all of them to find out if we can map
- * any of them right now.
+ * between them, so iterate over all of them.
*
* +--------------------------------------------+
* +---------------+----------------+ +----------------+
@@ -1375,51 +1467,29 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
*/
do {
struct vm_area_struct *vma;
- hva_t vm_start, vm_end;
vma = find_vma_intersection(current->mm, hva, reg_end);
if (!vma)
break;
/*
- * Take the intersection of this VMA with the memory region
+ * VM_SHARED mappings are not allowed with MTE to avoid races
+ * when updating the PG_mte_tagged page flag, see
+ * sanitise_mte_tags for more details.
*/
- vm_start = max(hva, vma->vm_start);
- vm_end = min(reg_end, vma->vm_end);
+ if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+ return -EINVAL;
if (vma->vm_flags & VM_PFNMAP) {
- gpa_t gpa = mem->guest_phys_addr +
- (vm_start - mem->userspace_addr);
- phys_addr_t pa;
-
- pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
- pa += vm_start - vma->vm_start;
-
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
- goto out;
- }
-
- ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
- vm_end - vm_start,
- writable);
- if (ret)
break;
+ }
}
- hva = vm_end;
+ hva = min(reg_end, vma->vm_end);
} while (hva < reg_end);
- if (change == KVM_MR_FLAGS_ONLY)
- goto out;
-
- spin_lock(&kvm->mmu_lock);
- if (ret)
- unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
- else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
- stage2_flush_memslot(kvm, memslot);
- spin_unlock(&kvm->mmu_lock);
-out:
mmap_read_unlock(current->mm);
return ret;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index fd167d4f4215..f33825c995cb 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
+ mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_counter_value(vcpu, i, 0);
}
@@ -850,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
return -EINVAL;
}
+ /* One-off reload of the PMU on first run */
+ kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+
return 0;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index d37ebee085cf..cba7872d69a8 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
return false;
+ /* MTE is incompatible with AArch32 */
+ if (kvm_has_mte(vcpu->kvm) && is32bit)
+ return false;
+
/* Check that the vcpus are either all 32bit or all 64bit */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1a7968ad078c..f6f126eb6ac1 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64PFR1_EL1:
val &= ~FEATURE(ID_AA64PFR1_MTE);
+ if (kvm_has_mte(vcpu->kvm)) {
+ u64 pfr, mte;
+
+ pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
+ val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
+ }
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
+static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_mte(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+#define MTE_REG(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = undef_access, \
+ .reset = reset_unknown, \
+ .reg = name, \
+ .visibility = mte_visibility, \
+}
+
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
@@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
- { SYS_DESC(SYS_RGSR_EL1), undef_access },
- { SYS_DESC(SYS_GCR_EL1), undef_access },
+ MTE_REG(RGSR_EL1),
+ MTE_REG(GCR_EL1),
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
@@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
- { SYS_DESC(SYS_TFSR_EL1), undef_access },
- { SYS_DESC(SYS_TFSRE0_EL1), undef_access },
+ MTE_REG(TFSR_EL1),
+ MTE_REG(TFSRE0_EL1),
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 58cbda00e56d..340c51d87677 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static struct gic_kvm_info *gic_kvm_info;
+
+void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
+{
+ BUG_ON(gic_kvm_info != NULL);
+ gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (gic_kvm_info)
+ *gic_kvm_info = *info;
+}
+
/**
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
*
@@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void)
*/
int kvm_vgic_hyp_init(void)
{
- const struct gic_kvm_info *gic_kvm_info;
+ bool has_mask;
int ret;
- gic_kvm_info = gic_get_kvm_info();
if (!gic_kvm_info)
return -ENODEV;
- if (!gic_kvm_info->maint_irq) {
+ has_mask = !gic_kvm_info->no_maint_irq_mask;
+
+ if (has_mask && !gic_kvm_info->maint_irq) {
kvm_err("No vgic maintenance irq\n");
return -ENXIO;
}
+ /*
+ * If we get one of these oddball non-GICs, taint the kernel,
+ * as we have no idea of how they *really* behave.
+ */
+ if (gic_kvm_info->no_hw_deactivation) {
+ kvm_info("Non-architectural vgic, tainting kernel\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ kvm_vgic_global_state.no_hw_deactivation = true;
+ }
+
switch (gic_kvm_info->type) {
case GIC_V2:
ret = vgic_v2_probe(gic_kvm_info);
@@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void)
ret = -ENODEV;
}
+ kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+
+ kfree(gic_kvm_info);
+ gic_kvm_info = NULL;
+
if (ret)
return ret;
- kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+ if (!has_mask)
+ return 0;
+
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
vgic_maintenance_handler,
"vgic", kvm_get_running_vcpus());
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 11934c2af2f4..2c580204f1dc 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand
+ * on deactivation (no HW deactivation, for example).
*/
- if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
- irq->line_level = vgic_get_phys_line_level(irq);
+ if (vgic_irq_is_mapped_level(irq)) {
+ bool resample = false;
+
+ if (val & GICH_LR_PENDING_BIT) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+ resample = !irq->line_level;
+ } else if (vgic_irq_needs_resampling(irq) &&
+ !(irq->active || irq->pending_latch)) {
+ resample = true;
+ }
- if (!irq->line_level)
+ if (resample)
vgic_irq_set_phys_active(irq, false);
}
@@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (irq->group)
val |= GICH_LR_GROUP1;
- if (irq->hw) {
+ if (irq->hw && !vgic_irq_needs_resampling(irq)) {
val |= GICH_LR_HW;
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
/*
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 41ecf219c333..66004f61cd83 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand
+ * on deactivation (no HW deactivation, for example).
*/
- if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
- irq->line_level = vgic_get_phys_line_level(irq);
+ if (vgic_irq_is_mapped_level(irq)) {
+ bool resample = false;
+
+ if (val & ICH_LR_PENDING_BIT) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+ resample = !irq->line_level;
+ } else if (vgic_irq_needs_resampling(irq) &&
+ !(irq->active || irq->pending_latch)) {
+ resample = true;
+ }
- if (!irq->line_level)
+ if (resample)
vgic_irq_set_phys_active(irq, false);
}
@@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
}
- if (irq->hw) {
+ if (irq->hw && !vgic_irq_needs_resampling(irq)) {
val |= ICH_LR_HW;
val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
/*
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 15b666200f0b..111bff47e471 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq)
BUG_ON(!irq->hw);
- if (irq->get_input_level)
- return irq->get_input_level(irq->intid);
+ if (irq->ops && irq->ops->get_input_level)
+ return irq->ops->get_input_level(irq->intid);
WARN_ON(irq_get_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING,
@@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
/* @irq->irq_lock must be held */
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
unsigned int host_irq,
- bool (*get_input_level)(int vindid))
+ struct irq_ops *ops)
{
struct irq_desc *desc;
struct irq_data *data;
@@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->hw = true;
irq->host_irq = host_irq;
irq->hwintid = data->hwirq;
- irq->get_input_level = get_input_level;
+ irq->ops = ops;
return 0;
}
@@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
{
irq->hw = false;
irq->hwintid = 0;
- irq->get_input_level = NULL;
+ irq->ops = NULL;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid, bool (*get_input_level)(int vindid))
+ u32 vintid, struct irq_ops *ops)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
unsigned long flags;
@@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
BUG_ON(!irq);
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
+ ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index c83bb5a4aad2..baee22961bdb 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt)
* barrier to order the cache maintenance against the memcpy.
*/
memcpy(dst, src, cnt);
- __clean_dcache_area_pop(dst, cnt);
+ dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt);
}
EXPORT_SYMBOL_GPL(memcpy_flushcache);
@@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
rc = raw_copy_from_user(to, from, n);
/* See above */
- __clean_dcache_area_pop(to, n - rc);
+ dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc);
return rc;
}
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 2d881f34dd9d..5051b3c1a4f1 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -15,7 +15,7 @@
#include <asm/asm-uaccess.h>
/*
- * flush_icache_range(start,end)
+ * caches_clean_inval_pou_macro(start,end) [fixup]
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
@@ -23,12 +23,27 @@
*
* - start - virtual start address of region
* - end - virtual end address of region
+ * - fixup - optional label to branch to on user fault
*/
-SYM_FUNC_START(__flush_icache_range)
- /* FALLTHROUGH */
+.macro caches_clean_inval_pou_macro, fixup
+alternative_if ARM64_HAS_CACHE_IDC
+ dsb ishst
+ b .Ldc_skip_\@
+alternative_else_nop_endif
+ mov x2, x0
+ mov x3, x1
+ dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup
+.Ldc_skip_\@:
+alternative_if ARM64_HAS_CACHE_DIC
+ isb
+ b .Lic_skip_\@
+alternative_else_nop_endif
+ invalidate_icache_by_line x0, x1, x2, x3, \fixup
+.Lic_skip_\@:
+.endm
/*
- * __flush_cache_user_range(start,end)
+ * caches_clean_inval_pou(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
@@ -37,117 +52,103 @@ SYM_FUNC_START(__flush_icache_range)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-SYM_FUNC_START(__flush_cache_user_range)
+SYM_FUNC_START(caches_clean_inval_pou)
+ caches_clean_inval_pou_macro
+ ret
+SYM_FUNC_END(caches_clean_inval_pou)
+
+/*
+ * caches_clean_inval_user_pou(start,end)
+ *
+ * Ensure that the I and D caches are coherent within specified region.
+ * This is typically used when code has been written to a memory region,
+ * and will be executed.
+ *
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+SYM_FUNC_START(caches_clean_inval_user_pou)
uaccess_ttbr0_enable x2, x3, x4
-alternative_if ARM64_HAS_CACHE_IDC
- dsb ishst
- b 7f
-alternative_else_nop_endif
- dcache_line_size x2, x3
- sub x3, x2, #1
- bic x4, x0, x3
-1:
-user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
- add x4, x4, x2
- cmp x4, x1
- b.lo 1b
- dsb ish
-7:
-alternative_if ARM64_HAS_CACHE_DIC
- isb
- b 8f
-alternative_else_nop_endif
- invalidate_icache_by_line x0, x1, x2, x3, 9f
-8: mov x0, #0
+ caches_clean_inval_pou_macro 2f
+ mov x0, xzr
1:
uaccess_ttbr0_disable x1, x2
ret
-9:
+2:
mov x0, #-EFAULT
b 1b
-SYM_FUNC_END(__flush_icache_range)
-SYM_FUNC_END(__flush_cache_user_range)
+SYM_FUNC_END(caches_clean_inval_user_pou)
/*
- * invalidate_icache_range(start,end)
+ * icache_inval_pou(start,end)
*
* Ensure that the I cache is invalid within specified region.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
-SYM_FUNC_START(invalidate_icache_range)
+SYM_FUNC_START(icache_inval_pou)
alternative_if ARM64_HAS_CACHE_DIC
- mov x0, xzr
isb
ret
alternative_else_nop_endif
- uaccess_ttbr0_enable x2, x3, x4
-
- invalidate_icache_by_line x0, x1, x2, x3, 2f
- mov x0, xzr
-1:
- uaccess_ttbr0_disable x1, x2
+ invalidate_icache_by_line x0, x1, x2, x3
ret
-2:
- mov x0, #-EFAULT
- b 1b
-SYM_FUNC_END(invalidate_icache_range)
+SYM_FUNC_END(icache_inval_pou)
/*
- * __flush_dcache_area(kaddr, size)
+ * dcache_clean_inval_poc(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned and invalidated to the PoC.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
/*
- * __clean_dcache_area_pou(kaddr, size)
+ * dcache_clean_pou(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned to the PoU.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
-SYM_FUNC_START(__clean_dcache_area_pou)
+SYM_FUNC_START(dcache_clean_pou)
alternative_if ARM64_HAS_CACHE_IDC
dsb ishst
ret
alternative_else_nop_endif
dcache_by_line_op cvau, ish, x0, x1, x2, x3
ret
-SYM_FUNC_END(__clean_dcache_area_pou)
+SYM_FUNC_END(dcache_clean_pou)
/*
- * __inval_dcache_area(kaddr, size)
+ * dcache_inval_poc(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are invalidated. Any partial lines at the ends of the interval are
* also cleaned to PoC to prevent data loss.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - kernel start address of region
+ * - end - kernel end address of region
*/
SYM_FUNC_START_LOCAL(__dma_inv_area)
-SYM_FUNC_START_PI(__inval_dcache_area)
+SYM_FUNC_START_PI(dcache_inval_poc)
/* FALLTHROUGH */
/*
- * __dma_inv_area(start, size)
+ * __dma_inv_area(start, end)
* - start - virtual start address of region
- * - size - size in question
+ * - end - virtual end address of region
*/
- add x1, x1, x0
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
@@ -165,48 +166,48 @@ SYM_FUNC_START_PI(__inval_dcache_area)
b.lo 2b
dsb sy
ret
-SYM_FUNC_END_PI(__inval_dcache_area)
+SYM_FUNC_END_PI(dcache_inval_poc)
SYM_FUNC_END(__dma_inv_area)
/*
- * __clean_dcache_area_poc(kaddr, size)
+ * dcache_clean_poc(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned to the PoC.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
SYM_FUNC_START_LOCAL(__dma_clean_area)
-SYM_FUNC_START_PI(__clean_dcache_area_poc)
+SYM_FUNC_START_PI(dcache_clean_poc)
/* FALLTHROUGH */
/*
- * __dma_clean_area(start, size)
+ * __dma_clean_area(start, end)
* - start - virtual start address of region
- * - size - size in question
+ * - end - virtual end address of region
*/
dcache_by_line_op cvac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__clean_dcache_area_poc)
+SYM_FUNC_END_PI(dcache_clean_poc)
SYM_FUNC_END(__dma_clean_area)
/*
- * __clean_dcache_area_pop(kaddr, size)
+ * dcache_clean_pop(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned to the PoP.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
-SYM_FUNC_START_PI(__clean_dcache_area_pop)
+SYM_FUNC_START_PI(dcache_clean_pop)
alternative_if_not ARM64_HAS_DCPOP
- b __clean_dcache_area_poc
+ b dcache_clean_poc
alternative_else_nop_endif
dcache_by_line_op cvap, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__clean_dcache_area_pop)
+SYM_FUNC_END_PI(dcache_clean_pop)
/*
* __dma_flush_area(start, size)
@@ -217,6 +218,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop)
* - size - size in question
*/
SYM_FUNC_START_PI(__dma_flush_area)
+ add x1, x0, x1
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
SYM_FUNC_END_PI(__dma_flush_area)
@@ -228,6 +230,7 @@ SYM_FUNC_END_PI(__dma_flush_area)
* - dir - DMA direction
*/
SYM_FUNC_START_PI(__dma_map_area)
+ add x1, x0, x1
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_area
b __dma_clean_area
@@ -240,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area)
* - dir - DMA direction
*/
SYM_FUNC_START_PI(__dma_unmap_area)
+ add x1, x0, x1
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_area
ret
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 6d44c028d1c9..2aaf950b906c 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -14,28 +14,25 @@
#include <asm/cache.h>
#include <asm/tlbflush.h>
-void sync_icache_aliases(void *kaddr, unsigned long len)
+void sync_icache_aliases(unsigned long start, unsigned long end)
{
- unsigned long addr = (unsigned long)kaddr;
-
if (icache_is_aliasing()) {
- __clean_dcache_area_pou(kaddr, len);
- __flush_icache_all();
+ dcache_clean_pou(start, end);
+ icache_inval_all_pou();
} else {
/*
* Don't issue kick_all_cpus_sync() after I-cache invalidation
* for user mappings.
*/
- __flush_icache_range(addr, addr + len);
+ caches_clean_inval_pou(start, end);
}
}
-static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
- unsigned long uaddr, void *kaddr,
- unsigned long len)
+static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
{
if (vma->vm_flags & VM_EXEC)
- sync_icache_aliases(kaddr, len);
+ sync_icache_aliases(start, end);
}
/*
@@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long len)
{
memcpy(dst, src, len);
- flush_ptrace_access(vma, page, uaddr, dst, len);
+ flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len);
}
void __sync_icache_dcache(pte_t pte)
@@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte)
struct page *page = pte_page(pte);
if (!test_bit(PG_dcache_clean, &page->flags)) {
- sync_icache_aliases(page_address(page), page_size(page));
+ sync_icache_aliases((unsigned long)page_address(page),
+ (unsigned long)page_address(page) +
+ page_size(page));
set_bit(PG_dcache_clean, &page->flags);
}
}
@@ -77,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page);
/*
* Additional functions defined in assembly.
*/
-EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(caches_clean_inval_pou);
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size)
{
/* Ensure order against any prior non-cacheable writes */
dmb(osh);
- __clean_dcache_area_pop(addr, size);
+ dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
- __inval_dcache_area(addr, size);
+ dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
#endif
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6dd9369e3ea0..89b66ef43a0f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
- if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
+ if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
+ IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*