diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2021-06-25 18:24:24 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2021-06-25 18:24:24 +0300 |
commit | b8917b4ae44d1b945f6fba3d8ee6777edb44633b (patch) | |
tree | 0e3b6b1113b40d444f078b3bedcd98b93ac78f4a /arch/arm64 | |
parent | 79b1e56509beb8d53b2b92f27555cd2175c67b8a (diff) | |
parent | 188982cda00ebfe28b50c2905d9bbaa2e9a001b9 (diff) | |
download | linux-b8917b4ae44d1b945f6fba3d8ee6777edb44633b.tar.xz |
Merge tag 'kvmarm-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for v5.14.
- Add MTE support in guests, complete with tag save/restore interface
- Reduce the impact of CMOs by moving them in the page-table code
- Allow device block mappings at stage-2
- Reduce the footprint of the vmemmap in protected mode
- Support the vGIC on dumb systems such as the Apple M1
- Add selftest infrastructure to support multiple configuration
and apply that to PMU/non-PMU setups
- Add selftests for the debug architecture
- The usual crop of PMU fixes
Diffstat (limited to 'arch/arm64')
81 files changed, 1229 insertions, 525 deletions
diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index d6465823b281..7b393cfec071 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ -obj-$(CONFIG_NET) += net/ +obj-y += kernel/ mm/ net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi index c62ddb9b2ba5..3771144a2ce4 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi @@ -14,7 +14,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; @@ -29,7 +28,6 @@ ports { port@0 { - reg = <0>; csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index d64fb8b1b86c..46f8dbf68904 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -2573,6 +2573,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2628,6 +2632,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi index 5b05474dc272..d16a4be5ef77 100644 --- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi @@ -2419,6 +2419,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2474,6 +2478,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts index e7b4a929bb17..2e3d1981cac4 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts +++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts @@ -33,7 +33,7 @@ status = "okay"; ports { - port { + port@0 { csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index 20fa3caa050e..1aef34447abd 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1823,6 +1823,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 8eb006cbd9af..1f51237ab0a6 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -2709,6 +2709,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2764,6 +2768,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi index 25b87da32eeb..b643d3079db1 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi @@ -192,6 +192,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi index 5c39152e4570..85d66d15465a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi @@ -3097,6 +3097,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3152,6 +3156,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3191,6 +3199,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi index 25d947a81b29..12476e354d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi @@ -2761,6 +2761,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2816,6 +2820,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi index ab081f14af9a..d9804768425a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi @@ -2499,6 +2499,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2554,6 +2558,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 657b20d3533b..dcb9df861d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -2575,6 +2575,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2630,6 +2634,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 5a5d5649332a..e8f6352c3665 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -1106,6 +1106,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index 1ffa4a995a7a..7b51d464de0e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1439,6 +1439,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -1478,6 +1482,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index 295d34f1d216..4715e4a4abe0 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -298,8 +298,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 5010f23fafcc..0eaea58f4210 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -1970,6 +1970,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index e18747df219f..453ffcef24fa 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -349,7 +349,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1>; @@ -364,8 +363,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2 3 4>; diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 8a078fc662ac..477703578caa 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -197,11 +197,6 @@ alternative_endif #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) -.macro user_alt, label, oldinstr, newinstr, cond -9999: alternative_insn "\oldinstr", "\newinstr", \cond - _asm_extable 9999b, \label -.endm - #endif /* __ASSEMBLY__ */ /* diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 934b9be582d2..4ad22c3135db 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void) #define gic_read_lpir(c) readq_relaxed(c) #define gic_write_lpir(v, c) writeq_relaxed(v, c) -#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define gic_flush_dcache_to_poc(a,l) \ + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) #define gits_read_baser(c) readq_relaxed(c) #define gits_write_baser(v, c) writeq_relaxed(v, c) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 8418c1bd8f04..c4cecf85dccf 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -130,15 +130,27 @@ alternative_endif .endm /* - * Emit an entry into the exception table + * Create an exception table entry for `insn`, which will branch to `fixup` + * when an unhandled fault is taken. */ - .macro _asm_extable, from, to + .macro _asm_extable, insn, fixup .pushsection __ex_table, "a" .align 3 - .long (\from - .), (\to - .) + .long (\insn - .), (\fixup - .) .popsection .endm +/* + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise + * do nothing. + */ + .macro _cond_extable, insn, fixup + .ifnc \fixup, + _asm_extable \insn, \fixup + .endif + .endm + + #define USER(l, x...) \ 9999: x; \ _asm_extable 9999b, l @@ -375,51 +387,53 @@ alternative_cb_end bfi \tcr, \tmp0, \pos, #3 .endm + .macro __dcache_op_workaround_clean_cache, op, addr +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \addr +alternative_else + dc civac, \addr +alternative_endif + .endm + /* * Macro to perform a data cache maintenance for the interval - * [kaddr, kaddr + size) + * [start, end) * * op: operation passed to dc instruction * domain: domain used in dsb instruciton - * kaddr: starting virtual address of the region - * size: size of the region - * Corrupts: kaddr, size, tmp1, tmp2 + * start: starting virtual address of the region + * end: end virtual address of the region + * fixup: optional label to branch to on user fault + * Corrupts: start, end, tmp1, tmp2 */ - .macro __dcache_op_workaround_clean_cache, op, kaddr -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \kaddr -alternative_else - dc civac, \kaddr -alternative_endif - .endm - - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup dcache_line_size \tmp1, \tmp2 - add \size, \kaddr, \size sub \tmp2, \tmp1, #1 - bic \kaddr, \kaddr, \tmp2 -9998: + bic \start, \start, \tmp2 +.Ldcache_op\@: .ifc \op, cvau - __dcache_op_workaround_clean_cache \op, \kaddr + __dcache_op_workaround_clean_cache \op, \start .else .ifc \op, cvac - __dcache_op_workaround_clean_cache \op, \kaddr + __dcache_op_workaround_clean_cache \op, \start .else .ifc \op, cvap - sys 3, c7, c12, 1, \kaddr // dc cvap + sys 3, c7, c12, 1, \start // dc cvap .else .ifc \op, cvadp - sys 3, c7, c13, 1, \kaddr // dc cvadp + sys 3, c7, c13, 1, \start // dc cvadp .else - dc \op, \kaddr + dc \op, \start .endif .endif .endif .endif - add \kaddr, \kaddr, \tmp1 - cmp \kaddr, \size - b.lo 9998b + add \start, \start, \tmp1 + cmp \start, \end + b.lo .Ldcache_op\@ dsb \domain + + _cond_extable .Ldcache_op\@, \fixup .endm /* @@ -427,20 +441,22 @@ alternative_endif * [start, end) * * start, end: virtual addresses describing the region - * label: A label to branch to on user fault. + * fixup: optional label to branch to on user fault * Corrupts: tmp1, tmp2 */ - .macro invalidate_icache_by_line start, end, tmp1, tmp2, label + .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup icache_line_size \tmp1, \tmp2 sub \tmp2, \tmp1, #1 bic \tmp2, \start, \tmp2 -9997: -USER(\label, ic ivau, \tmp2) // invalidate I line PoU +.Licache_op\@: + ic ivau, \tmp2 // invalidate I line PoU add \tmp2, \tmp2, \tmp1 cmp \tmp2, \end - b.lo 9997b + b.lo .Licache_op\@ dsb ish isb + + _cond_extable .Licache_op\@, \fixup .endm /* diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2175ec0004ed..451e11e5fd23 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, * This insanity brought to you by speculative system register reads, * out-of-order memory accesses, sequence locks and Thomas Gleixner. * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ */ #define arch_counter_enforce_ordering(val) do { \ u64 tmp, _val = (val); \ diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 52e5c1623224..543c997eb3b7 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -30,45 +30,58 @@ * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT I-cache. * - * flush_icache_range(start, end) + * All functions below apply to the interval [start, end) + * - start - virtual start address (inclusive) + * - end - virtual end address (exclusive) * - * Ensure coherency between the I-cache and the D-cache in the - * region described by start, end. - * - start - virtual start address - * - end - virtual end address + * caches_clean_inval_pou(start, end) * - * invalidate_icache_range(start, end) + * Ensure coherency between the I-cache and the D-cache region to + * the Point of Unification. * - * Invalidate the I-cache in the region described by start, end. - * - start - virtual start address - * - end - virtual end address + * caches_clean_inval_user_pou(start, end) * - * __flush_cache_user_range(start, end) + * Ensure coherency between the I-cache and the D-cache region to + * the Point of Unification. + * Use only if the region might access user memory. * - * Ensure coherency between the I-cache and the D-cache in the - * region described by start, end. - * - start - virtual start address - * - end - virtual end address + * icache_inval_pou(start, end) * - * __flush_dcache_area(kaddr, size) + * Invalidate I-cache region to the Point of Unification. * - * Ensure that the data held in page is written back. - * - kaddr - page address - * - size - region size + * dcache_clean_inval_poc(start, end) + * + * Clean and invalidate D-cache region to the Point of Coherency. + * + * dcache_inval_poc(start, end) + * + * Invalidate D-cache region to the Point of Coherency. + * + * dcache_clean_poc(start, end) + * + * Clean D-cache region to the Point of Coherency. + * + * dcache_clean_pop(start, end) + * + * Clean D-cache region to the Point of Persistence. + * + * dcache_clean_pou(start, end) + * + * Clean D-cache region to the Point of Unification. */ -extern void __flush_icache_range(unsigned long start, unsigned long end); -extern int invalidate_icache_range(unsigned long start, unsigned long end); -extern void __flush_dcache_area(void *addr, size_t len); -extern void __inval_dcache_area(void *addr, size_t len); -extern void __clean_dcache_area_poc(void *addr, size_t len); -extern void __clean_dcache_area_pop(void *addr, size_t len); -extern void __clean_dcache_area_pou(void *addr, size_t len); -extern long __flush_cache_user_range(unsigned long start, unsigned long end); -extern void sync_icache_aliases(void *kaddr, unsigned long len); +extern void caches_clean_inval_pou(unsigned long start, unsigned long end); +extern void icache_inval_pou(unsigned long start, unsigned long end); +extern void dcache_clean_inval_poc(unsigned long start, unsigned long end); +extern void dcache_inval_poc(unsigned long start, unsigned long end); +extern void dcache_clean_poc(unsigned long start, unsigned long end); +extern void dcache_clean_pop(unsigned long start, unsigned long end); +extern void dcache_clean_pou(unsigned long start, unsigned long end); +extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end); +extern void sync_icache_aliases(unsigned long start, unsigned long end); static inline void flush_icache_range(unsigned long start, unsigned long end) { - __flush_icache_range(start, end); + caches_clean_inval_pou(start, end); /* * IPI all online CPUs so that they undergo a context synchronization @@ -122,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -static __always_inline void __flush_icache_all(void) +static __always_inline void icache_inval_all_pou(void) { if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) return; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 3578aba9c608..1bed37eb013a 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -137,7 +137,7 @@ void efi_virtmap_unload(void); static inline void efi_capsule_flush_cache_range(void *addr, int size) { - __flush_dcache_area(addr, size); + dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); } #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 692c9049befa..d436831dd706 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,7 +12,8 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ -#define HCR_ATA (UL(1) << 56) +#define HCR_ATA_SHIFT 56 +#define HCR_ATA (UL(1) << HCR_ATA_SHIFT) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 01b9857757f2..fd418955e31e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 |= HCR_TID2; + + if (kvm_has_mte(vcpu->kvm)) + vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5a2c82f63baa..41911585ae0c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -132,6 +133,9 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + + /* Memory Tagging Extension enabled for the guest */ + bool mte_enabled; }; struct kvm_vcpu_fault_info { @@ -206,6 +210,12 @@ enum vcpu_sysreg { CNTP_CVAL_EL0, CNTP_CTL_EL0, + /* Memory Tagging Extension registers */ + RGSR_EL1, /* Random Allocation Tag Seed Register */ + GCR_EL1, /* Tag Control Register */ + TFSR_EL1, /* Tag Fault Status Register (EL1) */ + TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -716,6 +726,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags); + /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); @@ -764,6 +777,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 25ed956f9af1..b52c5c4b9a3d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -180,17 +180,16 @@ static inline void *__kvm_vector_slot2addr(void *base, struct kvm; -#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define kvm_flush_dcache_to_poc(a,l) \ + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +static inline void __clean_dcache_guest_page(void *va, size_t size) { - void *va = page_address(pfn_to_page(pfn)); - /* * With FWB, we ensure that the guest always accesses memory using * cacheable attributes, and we don't have to clean to PoC when @@ -203,18 +202,14 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) kvm_flush_dcache_to_poc(va, size); } -static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, - unsigned long size) +static inline void __invalidate_icache_guest_page(void *va, size_t size) { if (icache_is_aliasing()) { /* any kind of VIPT cache */ - __flush_icache_all(); + icache_inval_all_pou(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ - void *va = page_address(pfn_to_page(pfn)); - - invalidate_icache_range((unsigned long)va, - (unsigned long)va + size); + icache_inval_pou((unsigned long)va, (unsigned long)va + size); } } diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h new file mode 100644 index 000000000000..de002636eb1f --- /dev/null +++ b/arch/arm64/include/asm/kvm_mte.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 ARM Ltd. + */ +#ifndef __ASM_KVM_MTE_H +#define __ASM_KVM_MTE_H + +#ifdef __ASSEMBLY__ + +#include <asm/sysreg.h> + +#ifdef CONFIG_ARM64_MTE + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\h_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\h_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\g_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + +.L__skip_switch\@: +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\g_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\g_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\h_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + + isb + +.L__skip_switch\@: +.endm + +#else /* !CONFIG_ARM64_MTE */ + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +.endm + +#endif /* CONFIG_ARM64_MTE */ +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_KVM_MTE_H */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c3674c47d48c..f004c0115d89 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -27,23 +27,29 @@ typedef u64 kvm_pte_t; /** * struct kvm_pgtable_mm_ops - Memory management callbacks. - * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter - * can be used by the walker to pass a memcache. The - * initial refcount of the page is 1. - * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The - * @size parameter is in bytes, and is rounded-up to the - * next page boundary. The resulting allocation is - * physically contiguous. - * @free_pages_exact: Free an exact number of memory pages previously - * allocated by zalloc_pages_exact. - * @get_page: Increment the refcount on a page. - * @put_page: Decrement the refcount on a page. When the refcount - * reaches 0 the page is automatically freed. - * @page_count: Return the refcount of a page. - * @phys_to_virt: Convert a physical address into a virtual address mapped - * in the current context. - * @virt_to_phys: Convert a virtual address mapped in the current context - * into a physical address. + * @zalloc_page: Allocate a single zeroed memory page. + * The @arg parameter can be used by the walker + * to pass a memcache. The initial refcount of + * the page is 1. + * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. + * The @size parameter is in bytes, and is rounded + * up to the next page boundary. The resulting + * allocation is physically contiguous. + * @free_pages_exact: Free an exact number of memory pages previously + * allocated by zalloc_pages_exact. + * @get_page: Increment the refcount on a page. + * @put_page: Decrement the refcount on a page. When the + * refcount reaches 0 the page is automatically + * freed. + * @page_count: Return the refcount of a page. + * @phys_to_virt: Convert a physical address into a virtual + * address mapped in the current context. + * @virt_to_phys: Convert a virtual address mapped in the current + * context into a physical address. + * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC + * for the specified memory address range. + * @icache_inval_pou: Invalidate the instruction cache to the PoU + * for the specified memory address range. */ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); @@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops { int (*page_count)(void *addr); void* (*phys_to_virt)(phys_addr_t phys); phys_addr_t (*virt_to_phys)(void *addr); + void (*dcache_clean_inval_poc)(void *addr, size_t size); + void (*icache_inval_pou)(void *addr, size_t size); }; /** diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index cf241b0f0a42..626d359b396e 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -7,6 +7,7 @@ #define MTE_GRANULE_SIZE UL(16) #define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE) #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index bc88a1ced0d7..347ef38a35f7 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 -void mte_sync_tags(pte_t *ptep, pte_t pte); +void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); @@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 -static inline void mte_sync_tags(pte_t *ptep, pte_t pte) +static inline void mte_sync_tags(pte_t old_pte, pte_t pte) { } static inline void mte_copy_page_tags(void *kto, const void *kfrom) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b10204e72fc..db5402168841 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); - if (system_supports_mte() && - pte_present(pte) && pte_tagged(pte) && !pte_special(pte)) - mte_sync_tags(ptep, pte); + /* + * If the PTE would provide user space access to the tags associated + * with it then ensure that the MTE tags are synchronised. Although + * pte_access_permitted() returns false for exec only mappings, they + * don't expose tags (instruction fetches don't check tags). + */ + if (system_supports_mte() && pte_access_permitted(pte, false) && + !pte_special(pte)) { + pte_t old_pte = READ_ONCE(*ptep); + /* + * We only need to synchronise if the new PTE has tags enabled + * or if swapping in (in which case another mapping may have + * set tags in the past even if this PTE isn't tagged). + * (!pte_none() && !pte_present()) is an open coded version of + * is_swap_pte() + */ + if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte))) + mte_sync_tags(old_pte, pte); + } __check_racy_pte_update(mm, ptep, pte); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 65d15700a168..347ccac2341e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -651,7 +651,8 @@ #define INIT_SCTLR_EL2_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ - SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) + SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ + SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) #define INIT_SCTLR_EL2_MMU_OFF \ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7859749d6628..5dab69d2c22b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) #define __NR_landlock_add_rule 445 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 24223adae150..b3edde68bc3e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -184,6 +184,17 @@ struct kvm_vcpu_events { __u32 reserved[12]; }; +struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void __user *addr; + __u64 flags; + __u64 reserved[2]; +}; + +#define KVM_ARM_TAGS_TO_GUEST 0 +#define KVM_ARM_TAGS_FROM_GUEST 1 + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index c906d20c7b52..3fb79b76e9d9 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu */ if (!is_module) { dsb(ish); - __flush_icache_all(); + icache_inval_all_pou(); isb(); /* Ignore ARM64_CB bit from feature mask */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0cb34ccb6e73..6f0044cb233e 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -111,6 +111,8 @@ int main(void) DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); + DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); + DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 0073b24b5d25..61a87fa1c305 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -28,7 +28,8 @@ SYM_CODE_START(efi_enter_kernel) * stale icache entries from before relocation. */ ldr w1, =kernel_size - bl __clean_dcache_area_poc + add x1, x0, x1 + bl dcache_clean_poc ic ialluis /* @@ -36,8 +37,8 @@ SYM_CODE_START(efi_enter_kernel) * so that we can safely disable the MMU and caches. */ adr x0, 0f - ldr w1, 3f - bl __clean_dcache_area_poc + adr x1, 3f + bl dcache_clean_poc 0: /* Turn off Dcache and MMU */ mrs x0, CurrentEL @@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel) mov x2, xzr mov x3, xzr br x19 +3: SYM_CODE_END(efi_enter_kernel) -3: .long . - 0b diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 96873dfa67fd..6928cb67d3a0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -117,8 +117,8 @@ SYM_CODE_START_LOCAL(preserve_boot_args) dmb sy // needed before dc ivac with // MMU off - mov x1, #0x20 // 4 x 8 bytes - b __inval_dcache_area // tail call + add x1, x0, #0x20 // 4 x 8 bytes + b dcache_inval_poc // tail call SYM_CODE_END(preserve_boot_args) /* @@ -268,8 +268,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 - bl __inval_dcache_area + bl dcache_inval_poc /* * Clear the init page tables. @@ -382,13 +381,11 @@ SYM_FUNC_START_LOCAL(__create_page_tables) adrp x0, idmap_pg_dir adrp x1, idmap_pg_end - sub x1, x1, x0 - bl __inval_dcache_area + bl dcache_inval_poc adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 - bl __inval_dcache_area + bl dcache_inval_poc ret x28 SYM_FUNC_END(__create_page_tables) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 8ccca660034e..81c0186a5e32 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -45,7 +45,7 @@ * Because this code has to be copied to a 'safe' page, it can't call out to * other functions by PC-relative address. Also remember that it may be * mid-way through over-writing other functions. For this reason it contains - * code from flush_icache_range() and uses the copy_page() macro. + * code from caches_clean_inval_pou() and uses the copy_page() macro. * * This 'safe' page is mapped via ttbr0, and executed from there. This function * switches to a copy of the linear map in ttbr1, performs the restore, then @@ -87,11 +87,12 @@ SYM_CODE_START(swsusp_arch_suspend_exit) copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 add x1, x10, #PAGE_SIZE - /* Clean the copied page to PoU - based on flush_icache_range() */ + /* Clean the copied page to PoU - based on caches_clean_inval_pou() */ raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 -2: dc cvau, x4 /* clean D line / unified line */ +2: /* clean D line / unified line */ +alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 2b diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b1cef371df2b..46a0b4d6e251 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length, return -ENOMEM; memcpy(page, src_start, length); - __flush_icache_range((unsigned long)page, (unsigned long)page + length); + caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; @@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length, return 0; } -#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) - #ifdef CONFIG_ARM64_MTE static DEFINE_XARRAY(mte_pages); @@ -383,13 +381,18 @@ int swsusp_arch_suspend(void) ret = swsusp_save(); } else { /* Clean kernel core startup/idle code to PoC*/ - dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); - dcache_clean_range(__idmap_text_start, __idmap_text_end); + dcache_clean_inval_poc((unsigned long)__mmuoff_data_start, + (unsigned long)__mmuoff_data_end); + dcache_clean_inval_poc((unsigned long)__idmap_text_start, + (unsigned long)__idmap_text_end); /* Clean kvm setup code to PoC? */ if (el2_reset_needed()) { - dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); - dcache_clean_range(__hyp_text_start, __hyp_text_end); + dcache_clean_inval_poc( + (unsigned long)__hyp_idmap_text_start, + (unsigned long)__hyp_idmap_text_end); + dcache_clean_inval_poc((unsigned long)__hyp_text_start, + (unsigned long)__hyp_text_end); } swsusp_mte_restore_tags(); @@ -474,7 +477,8 @@ int swsusp_arch_resume(void) * The hibernate exit text contains a set of el2 vectors, that will * be executed at el2 with the mmu off in order to reload hyp-stub. */ - __flush_dcache_area(hibernate_exit, exit_size); + dcache_clean_inval_poc((unsigned long)hibernate_exit, + (unsigned long)hibernate_exit + exit_size); /* * KASLR will cause the el2 vectors to be in a different location in diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index e628c8ce1ffe..53a381a7f65d 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void) for (i = 0; i < ARRAY_SIZE(regs); i++) { if (regs[i]->override) - __flush_dcache_area(regs[i]->override, + dcache_clean_inval_poc((unsigned long)regs[i]->override, + (unsigned long)regs[i]->override + sizeof(*regs[i]->override)); } } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index bcf3c2755370..c96a9a0043bf 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -35,7 +35,7 @@ __efistub_strnlen = __pi_strnlen; __efistub_strcmp = __pi_strcmp; __efistub_strncmp = __pi_strncmp; __efistub_strrchr = __pi_strrchr; -__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc; +__efistub_dcache_clean_poc = __pi_dcache_clean_poc; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) __efistub___memcpy = __pi_memcpy; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 6c0de2f60ea9..51cb8dc98d00 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -198,7 +198,7 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) ret = aarch64_insn_write(tp, insn); if (ret == 0) - __flush_icache_range((uintptr_t)tp, + caches_clean_inval_pou((uintptr_t)tp, (uintptr_t)tp + AARCH64_INSN_SIZE); return ret; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 341342b207f6..cfa2cfde3019 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); + dcache_clean_inval_poc((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, @@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); - __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); + dcache_clean_inval_poc((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); + dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, + (unsigned long)&memstart_offset_seed + + sizeof(memstart_offset_seed)); return offset; } diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 90a335c74442..03ceabe4d912 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -68,10 +68,16 @@ int machine_kexec_post_load(struct kimage *kimage) kimage->arch.kern_reloc = __pa(reloc_code); kexec_image_info(kimage); - /* Flush the reloc_code in preparation for its execution. */ - __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); - flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); + /* + * For execution with the MMU off, reloc_code needs to be cleaned to the + * PoC and invalidated from the I-cache. + */ + dcache_clean_inval_poc((unsigned long)reloc_code, + (unsigned long)reloc_code + + arm64_relocate_new_kernel_size); + icache_inval_pou((uintptr_t)reloc_code, + (uintptr_t)reloc_code + + arm64_relocate_new_kernel_size); return 0; } @@ -102,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage) for (entry = &kimage->head; ; entry++) { unsigned int flag; - void *addr; + unsigned long addr; /* flush the list entries. */ - __flush_dcache_area(entry, sizeof(kimage_entry_t)); + dcache_clean_inval_poc((unsigned long)entry, + (unsigned long)entry + + sizeof(kimage_entry_t)); flag = *entry & IND_FLAGS; if (flag == IND_DONE) break; - addr = phys_to_virt(*entry & PAGE_MASK); + addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); switch (flag) { case IND_INDIRECTION: @@ -120,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage) break; case IND_SOURCE: /* flush the source pages. */ - __flush_dcache_area(addr, PAGE_SIZE); + dcache_clean_inval_poc(addr, addr + PAGE_SIZE); break; case IND_DESTINATION: break; @@ -147,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage) kimage->segment[i].memsz, kimage->segment[i].memsz / PAGE_SIZE); - __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), - kimage->segment[i].memsz); + dcache_clean_inval_poc( + (unsigned long)phys_to_virt(kimage->segment[i].mem), + (unsigned long)phys_to_virt(kimage->segment[i].mem) + + kimage->segment[i].memsz); } } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 125a10e413e9..69b3fde8759e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode); EXPORT_SYMBOL_GPL(mte_async_mode); #endif -static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) +static void mte_sync_page_tags(struct page *page, pte_t old_pte, + bool check_swap, bool pte_is_tagged) { - pte_t old_pte = READ_ONCE(*ptep); - if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); @@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) return; } + if (!pte_is_tagged) + return; + page_kasan_tag_reset(page); /* * We need smp_wmb() in between setting the flags and clearing the @@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) mte_clear_page_tags(page_address(page)); } -void mte_sync_tags(pte_t *ptep, pte_t pte) +void mte_sync_tags(pte_t old_pte, pte_t pte) { struct page *page = pte_page(pte); long i, nr_pages = compound_nr(page); bool check_swap = nr_pages == 1; + bool pte_is_tagged = pte_tagged(pte); + + /* Early out if there's nothing to do */ + if (!check_swap && !pte_is_tagged) + return; /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { if (!test_and_set_bit(PG_mte_tagged, &page->flags)) - mte_sync_page_tags(page, ptep, check_swap); + mte_sync_page_tags(page, old_pte, check_swap, + pte_is_tagged); } } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2c247634552b..9be668f3f034 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, memcpy(dst, src, len); /* flush caches (dcache/icache) */ - sync_icache_aliases(dst, len); + sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len); kunmap_atomic(xol_page_kaddr); } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b07..9b4c1118194d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,7 +122,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + dcache_clean_inval_poc((unsigned long)&secondary_data, + (unsigned long)&secondary_data + + sizeof(secondary_data)); /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); @@ -143,7 +145,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + dcache_clean_inval_poc((unsigned long)&secondary_data, + (unsigned long)&secondary_data + + sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) status = READ_ONCE(__early_cpu_boot_status); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c45a83512805..7e1624ecab3c 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -36,7 +36,7 @@ static void write_pen_release(u64 val) unsigned long size = sizeof(secondary_holding_pen_release); secondary_holding_pen_release = val; - __flush_dcache_area(start, size); + dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size); } @@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * the boot protocol. */ writeq_relaxed(pa_holding_pen, release_addr); - __flush_dcache_area((__force void *)release_addr, - sizeof(*release_addr)); + dcache_clean_inval_poc((__force unsigned long)release_addr, + (__force unsigned long)release_addr + + sizeof(*release_addr)); /* * Send an event to wake up the secondary CPU. diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 265fe3eb1069..db5159a3055f 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end) dsb(ish); } - ret = __flush_cache_user_range(start, start + chunk); + ret = caches_clean_inval_user_pou(start, start + chunk); if (ret) return ret; diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 74e0699661e9..3df67c127489 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -9,6 +9,7 @@ #include <linux/kvm_host.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/uaccess.h> #include <clocksource/arm_arch_timer.h> @@ -973,36 +974,154 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } -int kvm_timer_hyp_init(bool has_gic) +static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) { - struct arch_timer_kvm_info *info; - int err; + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); - info = arch_timer_get_kvm_info(); - timecounter = &info->timecounter; + return 0; +} - if (!timecounter->cc) { - kvm_err("kvm_arch_timer: uninitialized timecounter\n"); - return -ENODEV; +static int timer_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool val) +{ + if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) + return irq_chip_set_parent_state(d, which, val); + + if (val) + irq_chip_mask_parent(d); + else + irq_chip_unmask_parent(d); + + return 0; +} + +static void timer_irq_eoi(struct irq_data *d) +{ + if (!irqd_is_forwarded_to_vcpu(d)) + irq_chip_eoi_parent(d); +} + +static void timer_irq_ack(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_ack) + d->chip->irq_ack(d); +} + +static struct irq_chip timer_chip = { + .name = "KVM", + .irq_ack = timer_irq_ack, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = timer_irq_eoi, + .irq_set_type = irq_chip_set_type_parent, + .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, + .irq_set_irqchip_state = timer_irq_set_irqchip_state, +}; + +static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + irq_hw_number_t hwirq = (uintptr_t)arg; + + return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + &timer_chip, NULL); +} + +static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ +} + +static const struct irq_domain_ops timer_domain_ops = { + .alloc = timer_irq_domain_alloc, + .free = timer_irq_domain_free, +}; + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; + +static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) +{ + *flags = irq_get_trigger_type(virq); + if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", + virq); + *flags = IRQF_TRIGGER_LOW; } +} - /* First, do the virtual EL1 timer irq */ +static int kvm_irq_init(struct arch_timer_kvm_info *info) +{ + struct irq_domain *domain = NULL; if (info->virtual_irq <= 0) { kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", info->virtual_irq); return -ENODEV; } + host_vtimer_irq = info->virtual_irq; + kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); + + if (kvm_vgic_global_state.no_hw_deactivation) { + struct fwnode_handle *fwnode; + struct irq_data *data; + + fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); + if (!fwnode) + return -ENOMEM; + + /* Assume both vtimer and ptimer in the same parent */ + data = irq_get_irq_data(host_vtimer_irq); + domain = irq_domain_create_hierarchy(data->domain, 0, + NR_KVM_TIMERS, fwnode, + &timer_domain_ops, NULL); + if (!domain) { + irq_domain_free_fwnode(fwnode); + return -ENOMEM; + } + + arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; + WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, + (void *)TIMER_VTIMER)); + } - host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); - if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && - host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", - host_vtimer_irq); - host_vtimer_irq_flags = IRQF_TRIGGER_LOW; + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); + + if (domain) + WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, + (void *)TIMER_PTIMER)); } + return 0; +} + +int kvm_timer_hyp_init(bool has_gic) +{ + struct arch_timer_kvm_info *info; + int err; + + info = arch_timer_get_kvm_info(); + timecounter = &info->timecounter; + + if (!timecounter->cc) { + kvm_err("kvm_arch_timer: uninitialized timecounter\n"); + return -ENODEV; + } + + err = kvm_irq_init(info); + if (err) + return err; + + /* First, do the virtual EL1 timer irq */ + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, "kvm guest vtimer", kvm_get_running_vcpus()); if (err) { @@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic) /* Now let's do the physical EL1 timer irq */ if (info->physical_irq > 0) { - host_ptimer_irq = info->physical_irq; - host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); - if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && - host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", - host_ptimer_irq); - host_ptimer_irq_flags = IRQF_TRIGGER_LOW; - } - err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, "kvm guest ptimer", kvm_get_running_vcpus()); if (err) { @@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, map.direct_vtimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); if (ret) return ret; @@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, map.direct_ptimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); } if (ret) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e720148232a0..e0b81870ff2a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; kvm->arch.return_nisv_io_abort_to_user = true; break; + case KVM_CAP_ARM_MTE: + if (!system_supports_mte() || kvm->created_vcpus) + return -EINVAL; + r = 0; + kvm->arch.mte_enabled = true; + break; default: r = -EINVAL; break; @@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) */ r = 1; break; + case KVM_CAP_ARM_MTE: + r = system_supports_mte(); + break; case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; @@ -689,6 +698,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) vgic_v4_load(vcpu); preempt_enable(); } + + if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) + kvm_pmu_handle_pmcr(vcpu, + __vcpu_sys_reg(vcpu, PMCR_EL0)); } } @@ -1078,7 +1091,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) stage2_unmap_vm(vcpu->kvm); else - __flush_icache_all(); + icache_inval_all_pou(); } vcpu_reset_hcr(vcpu); @@ -1350,6 +1363,13 @@ long kvm_arch_vm_ioctl(struct file *filp, return 0; } + case KVM_ARM_MTE_COPY_TAGS: { + struct kvm_arm_copy_mte_tags copy_tags; + + if (copy_from_user(©_tags, argp, sizeof(copy_tags))) + return -EFAULT; + return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); + } default: return -EINVAL; } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1512a8007a78..1dfb83578277 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1015,3 +1015,89 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } + +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) +{ + gpa_t guest_ipa = copy_tags->guest_ipa; + size_t length = copy_tags->length; + void __user *tags = copy_tags->addr; + gpa_t gfn; + bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST); + int ret = 0; + + if (!kvm_has_mte(kvm)) + return -EINVAL; + + if (copy_tags->reserved[0] || copy_tags->reserved[1]) + return -EINVAL; + + if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST) + return -EINVAL; + + if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) + return -EINVAL; + + gfn = gpa_to_gfn(guest_ipa); + + mutex_lock(&kvm->slots_lock); + + while (length > 0) { + kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL); + void *maddr; + unsigned long num_tags; + struct page *page; + + if (is_error_noslot_pfn(pfn)) { + ret = -EFAULT; + goto out; + } + + page = pfn_to_online_page(pfn); + if (!page) { + /* Reject ZONE_DEVICE memory */ + ret = -EFAULT; + goto out; + } + maddr = page_address(page); + + if (!write) { + if (test_bit(PG_mte_tagged, &page->flags)) + num_tags = mte_copy_tags_to_user(tags, maddr, + MTE_GRANULES_PER_PAGE); + else + /* No tags in memory, so write zeros */ + num_tags = MTE_GRANULES_PER_PAGE - + clear_user(tags, MTE_GRANULES_PER_PAGE); + kvm_release_pfn_clean(pfn); + } else { + num_tags = mte_copy_tags_from_user(maddr, tags, + MTE_GRANULES_PER_PAGE); + + /* + * Set the flag after checking the write + * completed fully + */ + if (num_tags == MTE_GRANULES_PER_PAGE) + set_bit(PG_mte_tagged, &page->flags); + + kvm_release_pfn_dirty(pfn); + } + + if (num_tags != MTE_GRANULES_PER_PAGE) { + ret = -EFAULT; + goto out; + } + + gfn++; + tags += num_tags; + length -= PAGE_SIZE; + } + +out: + mutex_unlock(&kvm->slots_lock); + /* If some data has been copied report the number of bytes copied */ + if (length != copy_tags->length) + return copy_tags->length - length; + return ret; +} diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e831d3dfd50d..435346ea1504 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -13,6 +13,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> +#include <asm/kvm_mte.h> #include <asm/kvm_ptrauth.h> .text @@ -51,6 +52,9 @@ alternative_else_nop_endif add x29, x0, #VCPU_CONTEXT + // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1) + mte_switch_to_guest x29, x1, x2 + // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) // The below macro to restore guest keys is not implemented in C code @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // when this feature is enabled for kernel code. ptrauth_switch_to_hyp x1, x2, x3, x4, x5 + // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1) + mte_switch_to_hyp x1, x2, x3 + // Restore hyp's sp_el0 restore_sp_el0 x2, x3 diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 11541b94b328..0418399e0a20 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= (old & PSR_C_BIT); new |= (old & PSR_V_BIT); - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + if (kvm_has_mte(vcpu->kvm)) + new |= PSR_TCO_BIT; new |= (old & PSR_DIT_BIT); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5f49df4ffdd8..9aa9b73475c9 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -76,6 +76,7 @@ el1_trap: b __guest_exit el1_irq: +el1_fiq: get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit @@ -131,7 +132,6 @@ SYM_CODE_END(\label) invalid_vector el2t_error_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid - invalid_vector el1_fiq_invalid .ltorg @@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_sync // Synchronous 64-bit EL1 valid_vect el1_irq // IRQ 64-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 + valid_vect el1_fiq // FIQ 64-bit EL1 valid_vect el1_error // Error 64-bit EL1 valid_vect el1_sync // Synchronous 32-bit EL1 valid_vect el1_irq // IRQ 32-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 + valid_vect el1_fiq // FIQ 32-bit EL1 valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index cce43bfe158f..de7e14c862e6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -14,6 +14,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); } +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return kvm_has_mte(kern_hyp_va(vcpu->kvm)); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + if (ctxt_has_mte(ctxt)) { + ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); + ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); + } + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); @@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + if (ctxt_has_mte(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); + write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); + } + if (!has_vhe() && cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && ctxt->__hyp_running_vcpu) { diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 18a4494337bd..fb0f523d1492 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -7,7 +7,7 @@ #include <nvhe/memory.h> #include <nvhe/spinlock.h> -#define HYP_NO_ORDER UINT_MAX +#define HYP_NO_ORDER USHRT_MAX struct hyp_pool { /* @@ -19,48 +19,13 @@ struct hyp_pool { struct list_head free_area[MAX_ORDER]; phys_addr_t range_start; phys_addr_t range_end; - unsigned int max_order; + unsigned short max_order; }; -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - p->refcount++; - hyp_spin_unlock(&pool->lock); -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - int ret; - - hyp_spin_lock(&pool->lock); - p->refcount--; - ret = (p->refcount == 0); - hyp_spin_unlock(&pool->lock); - - return ret; -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - if (p->refcount) { - hyp_spin_unlock(&pool->lock); - BUG(); - } - p->refcount = 1; - hyp_spin_unlock(&pool->lock); -} - /* Allocation */ -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); -void hyp_get_page(void *addr); -void hyp_put_page(void *addr); +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void hyp_get_page(struct hyp_pool *pool, void *addr); +void hyp_put_page(struct hyp_pool *pool, void *addr); /* Used pages cannot be freed */ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 42d81ec739fa..9c227d87c36d 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -23,7 +23,7 @@ extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool); +int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); static __always_inline void __load_host_stage2(void) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index fd78bde939ee..592b7edb3edb 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,12 +7,9 @@ #include <linux/types.h> -struct hyp_pool; struct hyp_page { - unsigned int refcount; - unsigned int order; - struct hyp_pool *pool; - struct list_head node; + unsigned short refcount; + unsigned short order; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 0095f6289742..8ec3a5a7744b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void) return res; } -static inline unsigned long host_s2_mem_pgtable_pages(void) +static inline unsigned long host_s2_pgtable_pages(void) { + unsigned long res; + /* * Include an extra 16 pages to safely upper-bound the worst case of * concatenated pgds. */ - return __hyp_pgtable_total_pages() + 16; -} + res = __hyp_pgtable_total_pages() + 16; -static inline unsigned long host_s2_dev_pgtable_pages(void) -{ /* Allow 1 GiB for MMIO mappings */ - return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; } #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 36cef6915428..958734f4d6b0 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,7 @@ #include <asm/assembler.h> #include <asm/alternative.h> -SYM_FUNC_START_PI(__flush_dcache_area) +SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__flush_dcache_area) +SYM_FUNC_END_PI(dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 4b60c0056c04..d938ce95d3bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -23,8 +23,7 @@ extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; -static struct hyp_pool host_s2_mem; -static struct hyp_pool host_s2_dev; +static struct hyp_pool host_s2_pool; /* * Copies of the host's CPU features registers holding sanitized values. @@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_mem, get_order(size)); + return hyp_alloc_pages(&host_s2_pool, get_order(size)); } static void *host_s2_zalloc_page(void *pool) @@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool) return hyp_alloc_pages(pool, 0); } -static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) +static void host_s2_get_page(void *addr) +{ + hyp_get_page(&host_s2_pool, addr); +} + +static void host_s2_put_page(void *addr) +{ + hyp_put_page(&host_s2_pool, addr); +} + +static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; int ret; - pfn = hyp_virt_to_pfn(mem_pgt_pool); - nr_pages = host_s2_mem_pgtable_pages(); - ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0); - if (ret) - return ret; - - pfn = hyp_virt_to_pfn(dev_pgt_pool); - nr_pages = host_s2_dev_pgtable_pages(); - ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0); + pfn = hyp_virt_to_pfn(pgt_pool_base); + nr_pages = host_s2_pgtable_pages(); + ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); if (ret) return ret; @@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = host_s2_get_page, + .put_page = host_s2_put_page, }; return 0; @@ -86,7 +89,7 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) +int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; int ret; @@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) prepare_host_vtcr(); hyp_spin_lock_init(&host_kvm.lock); - ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool); + ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; @@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end) } static inline int __host_stage2_idmap(u64 start, u64 end, - enum kvm_pgtable_prot prot, - struct hyp_pool *pool) + enum kvm_pgtable_prot prot) { return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, pool); + prot, &host_s2_pool); } static int host_stage2_idmap(u64 addr) @@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr) enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); - struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev; int ret; if (is_memory) @@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); - if (is_memory || ret != -ENOMEM) + ret = __host_stage2_idmap(range.start, range.end, prot); + if (ret != -ENOMEM) goto unlock; /* - * host_s2_mem has been provided with enough pages to cover all of - * memory with page granularity, so we should never hit the ENOMEM case. - * However, it is difficult to know how much of the MMIO range we will - * need to cover upfront, so we may need to 'recycle' the pages if we - * run out. + * The pool has been provided with enough pages to cover all of memory + * with page granularity, but it is difficult to know how much of the + * MMIO range we will need to cover upfront, so we may need to 'recycle' + * the pages if we run out. */ ret = host_stage2_unmap_dev_all(); if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); + ret = __host_stage2_idmap(range.start, range.end, prot); unlock: hyp_spin_unlock(&host_kvm.lock); @@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) hyp_spin_lock(&host_kvm.lock); ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_mem, pkvm_hyp_id); + &host_s2_pool, pkvm_hyp_id); hyp_spin_unlock(&host_kvm.lock); return ret != -EAGAIN ? ret : 0; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 237e03bf0cb1..41fc25bdfb34 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -32,7 +32,7 @@ u64 __hyp_vmemmap; */ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { phys_addr_t addr = hyp_page_to_phys(p); @@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, /* Find a buddy page currently available for allocation */ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); - if (!buddy || buddy->order != order || list_empty(&buddy->node)) + if (!buddy || buddy->order != order || buddy->refcount) return NULL; return buddy; } +/* + * Pages that are available for allocation are tracked in free-lists, so we use + * the pages themselves to store the list nodes to avoid wasting space. As the + * allocator always returns zeroed pages (which are zeroed on the hyp_put_page() + * path to optimize allocation speed), we also need to clean-up the list node in + * each page when we take it out of the list. + */ +static inline void page_remove_from_list(struct hyp_page *p) +{ + struct list_head *node = hyp_page_to_virt(p); + + __list_del_entry(node); + memset(node, 0, sizeof(*node)); +} + +static inline void page_add_to_list(struct hyp_page *p, struct list_head *head) +{ + struct list_head *node = hyp_page_to_virt(p); + + INIT_LIST_HEAD(node); + list_add_tail(node, head); +} + +static inline struct hyp_page *node_to_page(struct list_head *node) +{ + return hyp_virt_to_page(node); +} + static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { - unsigned int order = p->order; + unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); @@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool, break; /* Take the buddy out of its list, and coallesce with @p */ - list_del_init(&buddy->node); + page_remove_from_list(buddy); buddy->order = HYP_NO_ORDER; p = min(p, buddy); } /* Mark the new head, and insert it */ p->order = order; - list_add_tail(&p->node, &pool->free_area[order]); -} - -static void hyp_attach_page(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - __hyp_attach_page(pool, p); - hyp_spin_unlock(&pool->lock); + page_add_to_list(p, &pool->free_area[order]); } static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy; - list_del_init(&p->node); + page_remove_from_list(p); while (p->order > order) { /* * The buddy of order n - 1 currently has HYP_NO_ORDER as it @@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, p->order--; buddy = __find_buddy_nocheck(pool, p, p->order); buddy->order = p->order; - list_add_tail(&buddy->node, &pool->free_area[buddy->order]); + page_add_to_list(buddy, &pool->free_area[buddy->order]); } return p; } -void hyp_put_page(void *addr) +static inline void hyp_page_ref_inc(struct hyp_page *p) { - struct hyp_page *p = hyp_virt_to_page(addr); + BUG_ON(p->refcount == USHRT_MAX); + p->refcount++; +} +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + p->refcount--; + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} + +static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) +{ if (hyp_page_ref_dec_and_test(p)) - hyp_attach_page(p); + __hyp_attach_page(pool, p); +} + +/* + * Changes to the buddy tree and page refcounts must be done with the hyp_pool + * lock held. If a refcount change requires an update to the buddy tree (e.g. + * hyp_put_page()), both operations must be done within the same critical + * section to guarantee transient states (e.g. a page with null refcount but + * not yet attached to a free list) can't be observed by well-behaved readers. + */ +void hyp_put_page(struct hyp_pool *pool, void *addr) +{ + struct hyp_page *p = hyp_virt_to_page(addr); + + hyp_spin_lock(&pool->lock); + __hyp_put_page(pool, p); + hyp_spin_unlock(&pool->lock); } -void hyp_get_page(void *addr) +void hyp_get_page(struct hyp_pool *pool, void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); + hyp_spin_lock(&pool->lock); hyp_page_ref_inc(p); + hyp_spin_unlock(&pool->lock); } -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) { - unsigned int i = order; + unsigned short i = order; struct hyp_page *p; hyp_spin_lock(&pool->lock); @@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) } /* Extract it from the tree at the right order */ - p = list_first_entry(&pool->free_area[i], struct hyp_page, node); + p = node_to_page(pool->free_area[i].next); p = __hyp_extract_page(pool, p, order); - hyp_spin_unlock(&pool->lock); hyp_set_page_refcounted(p); + hyp_spin_unlock(&pool->lock); return hyp_page_to_virt(p); } @@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - memset(p, 0, sizeof(*p) * nr_pages); for (i = 0; i < nr_pages; i++) { - p[i].pool = pool; - INIT_LIST_HEAD(&p[i].node); + p[i].order = 0; + hyp_set_page_refcounted(&p[i]); } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) - __hyp_attach_page(pool, &p[i]); + __hyp_put_page(pool, &p[i]); return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a3d3a275344e..0b574d106519 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus; static void *vmemmap_base; static void *hyp_pgt_base; -static void *host_s2_mem_pgt_base; -static void *host_s2_dev_pgt_base; +static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static int divide_memory_pool(void *virt, unsigned long size) @@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!hyp_pgt_base) return -ENOMEM; - nr_pages = host_s2_mem_pgtable_pages(); - host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_mem_pgt_base) - return -ENOMEM; - - nr_pages = host_s2_dev_pgtable_pages(); - host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_dev_pgt_base) + nr_pages = host_s2_pgtable_pages(); + host_s2_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!host_s2_pgt_base) return -ENOMEM; return 0; @@ -134,7 +128,8 @@ static void update_nvhe_init_params(void) for (i = 0; i < hyp_nr_cpus; i++) { params = per_cpu_ptr(&kvm_init_params, i); params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); - __flush_dcache_area(params, sizeof(*params)); + dcache_clean_inval_poc((unsigned long)params, + (unsigned long)params + sizeof(*params)); } } @@ -143,6 +138,16 @@ static void *hyp_zalloc_hyp_page(void *arg) return hyp_alloc_pages(&hpool, 0); } +static void hpool_get_page(void *addr) +{ + hyp_get_page(&hpool, addr); +} + +static void hpool_put_page(void *addr) +{ + hyp_put_page(&hpool, addr); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -158,7 +163,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base); + ret = kvm_host_prepare_stage2(host_s2_pgt_base); if (ret) goto out; @@ -166,8 +171,8 @@ void __noreturn __pkvm_init_finalise(void) .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = hpool_get_page, + .put_page = hpool_put_page, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 83dc3b271bc5..38ed0f6f2703 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, * you should be running with VHE enabled. */ if (icache_is_vpipt()) - __flush_icache_all(); + icache_inval_all_pou(); __tlb_switch_to_host(&cxt); } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c37c1dc4feaf..05321f4165e3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, mm_ops->put_page(ptep); } +static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) +{ + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; + return memattr == KVM_S2_MEMATTR(pgt, NORMAL); +} + +static bool stage2_pte_executable(kvm_pte_t pte) +{ + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; + struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_block_mapping_supported(addr, end, phys, level)) @@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } + /* Perform CMOs before installation of the guest stage-2 PTE */ + if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), + granule); + + if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) + mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); + smp_store_release(ptep, new); if (stage2_pte_is_counted(new)) mm_ops->get_page(ptep); @@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) -{ - u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); -} - static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) @@ -839,8 +853,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, stage2_put_pte(ptep, mmu, addr, level, mm_ops); if (need_flush) { - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); + + dcache_clean_inval_poc((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); } if (childp) @@ -861,10 +878,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) } struct stage2_attr_data { - kvm_pte_t attr_set; - kvm_pte_t attr_clr; - kvm_pte_t pte; - u32 level; + kvm_pte_t attr_set; + kvm_pte_t attr_clr; + kvm_pte_t pte; + u32 level; + struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, @@ -873,6 +891,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, { kvm_pte_t pte = *ptep; struct stage2_attr_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return 0; @@ -887,8 +906,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * but worst-case the access flag update gets lost and will be * set on the next access instead. */ - if (data->pte != pte) + if (data->pte != pte) { + /* + * Invalidate instruction cache before updating the guest + * stage-2 PTE if we are going to add executable permission. + */ + if (mm_ops->icache_inval_pou && + stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); WRITE_ONCE(*ptep, pte); + } return 0; } @@ -903,6 +931,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, + .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, @@ -988,11 +1017,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; + kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); + pte_follow = kvm_pte_follow(pte, mm_ops); + dcache_clean_inval_poc((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); return 0; } diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index 83ca23ac259b..d654921dd09b 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void) } hyp_mem_pages += hyp_s1_pgtable_pages(); - hyp_mem_pages += host_s2_mem_pgtable_pages(); - hyp_mem_pages += host_s2_dev_pgtable_pages(); + hyp_mem_pages += host_s2_pgtable_pages(); /* * The hyp_vmemmap needs to be backed by pages, but these pages diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..57292dc5ce35 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys) return __va(phys); } +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(va, size); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(va, size); +} + /* * Unmapping vs dcache management: * @@ -432,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .page_count = kvm_host_page_count, .phys_to_virt = kvm_host_va, .virt_to_phys = kvm_host_pa, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, }; /** @@ -693,16 +705,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __clean_dcache_guest_page(pfn, size); -} - -static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __invalidate_icache_guest_page(pfn, size); -} - static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); @@ -822,6 +824,74 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) +{ + unsigned long pa; + + if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP)) + return huge_page_shift(hstate_vma(vma)); + + if (!(vma->vm_flags & VM_PFNMAP)) + return PAGE_SHIFT; + + VM_BUG_ON(is_vm_hugetlb_page(vma)); + + pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start); + +#ifndef __PAGETABLE_PMD_FOLDED + if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) && + ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start && + ALIGN(hva, PUD_SIZE) <= vma->vm_end) + return PUD_SHIFT; +#endif + + if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) && + ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start && + ALIGN(hva, PMD_SIZE) <= vma->vm_end) + return PMD_SHIFT; + + return PAGE_SHIFT; +} + +/* + * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be + * able to see the page's tags and therefore they must be initialised first. If + * PG_mte_tagged is set, tags have already been initialised. + * + * The race in the test/set of the PG_mte_tagged flag is handled by: + * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs + * racing to santise the same page + * - mmap_lock protects between a VM faulting a page in and the VMM performing + * an mprotect() to add VM_MTE + */ +static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) +{ + unsigned long i, nr_pages = size >> PAGE_SHIFT; + struct page *page; + + if (!kvm_has_mte(kvm)) + return 0; + + /* + * pfn_to_online_page() is used to reject ZONE_DEVICE pages + * that may not support tags. + */ + page = pfn_to_online_page(pfn); + + if (!page) + return -EFAULT; + + for (i = 0; i < nr_pages; i++, page++) { + if (!test_bit(PG_mte_tagged, &page->flags)) { + mte_clear_page_tags(page_address(page)); + set_bit(PG_mte_tagged, &page->flags); + } + } + + return 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -830,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; + bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -853,7 +924,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* Let's check if we will get back a huge page backed by hugetlbfs */ + /* + * Let's check if we will get back a huge page backed by hugetlbfs, or + * get block mapping for device MMIO region. + */ mmap_read_lock(current->mm); vma = find_vma_intersection(current->mm, hva, hva + 1); if (unlikely(!vma)) { @@ -862,17 +936,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma)) - vma_shift = huge_page_shift(hstate_vma(vma)); - else - vma_shift = PAGE_SHIFT; - - if (logging_active || - (vma->vm_flags & VM_PFNMAP)) { + /* + * logging_active is guaranteed to never be true for VM_PFNMAP + * memslots. + */ + if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; + } else { + vma_shift = get_vma_page_shift(vma, hva); } + shared = (vma->vm_flags & VM_PFNMAP); + switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -943,8 +1019,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; if (kvm_is_device_pfn(pfn)) { + /* + * If the page was identified as device early by looking at + * the VMA flags, vma_pagesize is already representing the + * largest quantity we can map. If instead it was mapped + * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE + * and must not be upgraded. + * + * In both cases, we don't let transparent_hugepage_adjust() + * change things at the last minute. + */ device = true; - force_pte = true; } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -965,19 +1050,25 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !force_pte) + if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) vma_pagesize = transparent_hugepage_adjust(memslot, hva, &pfn, &fault_ipa); + + if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { + /* Check the VMM hasn't introduced a new VM_SHARED VMA */ + if (!shared) + ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); + else + ret = -EFAULT; + if (ret) + goto out_unlock; + } + if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) - clean_dcache_guest_page(pfn, vma_pagesize); - - if (exec_fault) { + if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - invalidate_icache_guest_page(pfn, vma_pagesize); - } if (device) prot |= KVM_PGTABLE_PROT_DEVICE; @@ -1168,19 +1259,22 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); + int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); - /* - * We've moved a page around, probably through CoW, so let's treat it - * just like a translation fault and clean the cache to the PoC. - */ - clean_dcache_guest_page(pfn, PAGE_SIZE); + ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); + if (ret) + return false; /* + * We've moved a page around, probably through CoW, so let's treat + * it just like a translation fault and the map handler will clean + * the cache to the PoC. + * * The MMU notifiers will have unmapped a huge PMD before calling * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and * therefore we never need to clear out a huge PMD through this @@ -1346,7 +1440,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { hva_t hva = mem->userspace_addr; hva_t reg_end = hva + mem->memory_size; - bool writable = !(mem->flags & KVM_MEM_READONLY); int ret = 0; if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && @@ -1363,8 +1456,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and any holes - * between them, so iterate over all of them to find out if we can map - * any of them right now. + * between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -1375,51 +1467,29 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) break; /* - * Take the intersection of this VMA with the memory region + * VM_SHARED mappings are not allowed with MTE to avoid races + * when updating the PG_mte_tagged page flag, see + * sanitise_mte_tags for more details. */ - vm_start = max(hva, vma->vm_start); - vm_end = min(reg_end, vma->vm_end); + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) + return -EINVAL; if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = mem->guest_phys_addr + - (vm_start - mem->userspace_addr); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, gpa, pa, - vm_end - vm_start, - writable); - if (ret) break; + } } - hva = vm_end; + hva = min(reg_end, vma->vm_end); } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - spin_lock(&kvm->mmu_lock); - if (ret) - unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); - else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) - stage2_flush_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); -out: mmap_read_unlock(current->mm); return ret; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fd167d4f4215..f33825c995cb 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { + mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); } @@ -850,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) return -EINVAL; } + /* One-off reload of the PMU on first run */ + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d37ebee085cf..cba7872d69a8 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) return false; + /* MTE is incompatible with AArch32 */ + if (kvm_has_mte(vcpu->kvm) && is32bit) + return false; + /* Check that the vcpus are either all 32bit or all 64bit */ kvm_for_each_vcpu(i, tmp, vcpu->kvm) { if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1a7968ad078c..f6f126eb6ac1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64PFR1_EL1: val &= ~FEATURE(ID_AA64PFR1_MTE); + if (kvm_has_mte(vcpu->kvm)) { + u64 pfr, mte; + + pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); + val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte); + } break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (kvm_has_mte(vcpu->kvm)) + return 0; + + return REG_HIDDEN; +} + +#define MTE_REG(name) { \ + SYS_DESC(SYS_##name), \ + .access = undef_access, \ + .reset = reset_unknown, \ + .reg = name, \ + .visibility = mte_visibility, \ +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, - { SYS_DESC(SYS_RGSR_EL1), undef_access }, - { SYS_DESC(SYS_GCR_EL1), undef_access }, + MTE_REG(RGSR_EL1), + MTE_REG(GCR_EL1), { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TRFCR_EL1), undef_access }, @@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, - { SYS_DESC(SYS_TFSR_EL1), undef_access }, - { SYS_DESC(SYS_TFSRE0_EL1), undef_access }, + MTE_REG(TFSR_EL1), + MTE_REG(TFSRE0_EL1), { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 58cbda00e56d..340c51d87677 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +static struct gic_kvm_info *gic_kvm_info; + +void __init vgic_set_kvm_info(const struct gic_kvm_info *info) +{ + BUG_ON(gic_kvm_info != NULL); + gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL); + if (gic_kvm_info) + *gic_kvm_info = *info; +} + /** * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware * @@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void) */ int kvm_vgic_hyp_init(void) { - const struct gic_kvm_info *gic_kvm_info; + bool has_mask; int ret; - gic_kvm_info = gic_get_kvm_info(); if (!gic_kvm_info) return -ENODEV; - if (!gic_kvm_info->maint_irq) { + has_mask = !gic_kvm_info->no_maint_irq_mask; + + if (has_mask && !gic_kvm_info->maint_irq) { kvm_err("No vgic maintenance irq\n"); return -ENXIO; } + /* + * If we get one of these oddball non-GICs, taint the kernel, + * as we have no idea of how they *really* behave. + */ + if (gic_kvm_info->no_hw_deactivation) { + kvm_info("Non-architectural vgic, tainting kernel\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + kvm_vgic_global_state.no_hw_deactivation = true; + } + switch (gic_kvm_info->type) { case GIC_V2: ret = vgic_v2_probe(gic_kvm_info); @@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void) ret = -ENODEV; } + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + + kfree(gic_kvm_info); + gic_kvm_info = NULL; + if (ret) return ret; - kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + if (!has_mask) + return 0; + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 11934c2af2f4..2c580204f1dc 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (val & GICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } - if (!irq->line_level) + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->group) val |= GICH_LR_GROUP1; - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; /* diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 41ecf219c333..66004f61cd83 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (val & ICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } - if (!irq->line_level) + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= ICH_LR_HW; val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; /* diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 15b666200f0b..111bff47e471 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq) BUG_ON(!irq->hw); - if (irq->get_input_level) - return irq->get_input_level(irq->intid); + if (irq->ops && irq->ops->get_input_level) + return irq->ops->get_input_level(irq->intid); WARN_ON(irq_get_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING, @@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, /* @irq->irq_lock must be held */ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, unsigned int host_irq, - bool (*get_input_level)(int vindid)) + struct irq_ops *ops) { struct irq_desc *desc; struct irq_data *data; @@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->hw = true; irq->host_irq = host_irq; irq->hwintid = data->hwirq; - irq->get_input_level = get_input_level; + irq->ops = ops; return 0; } @@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) { irq->hw = false; irq->hwintid = 0; - irq->get_input_level = NULL; + irq->ops = NULL; } int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, bool (*get_input_level)(int vindid)) + u32 vintid, struct irq_ops *ops) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); unsigned long flags; @@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, BUG_ON(!irq); raw_spin_lock_irqsave(&irq->irq_lock, flags); - ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index c83bb5a4aad2..baee22961bdb 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt) * barrier to order the cache maintenance against the memcpy. */ memcpy(dst, src, cnt); - __clean_dcache_area_pop(dst, cnt); + dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt); } EXPORT_SYMBOL_GPL(memcpy_flushcache); @@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from, rc = raw_copy_from_user(to, from, n); /* See above */ - __clean_dcache_area_pop(to, n - rc); + dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc); return rc; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 2d881f34dd9d..5051b3c1a4f1 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -15,7 +15,7 @@ #include <asm/asm-uaccess.h> /* - * flush_icache_range(start,end) + * caches_clean_inval_pou_macro(start,end) [fixup] * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -23,12 +23,27 @@ * * - start - virtual start address of region * - end - virtual end address of region + * - fixup - optional label to branch to on user fault */ -SYM_FUNC_START(__flush_icache_range) - /* FALLTHROUGH */ +.macro caches_clean_inval_pou_macro, fixup +alternative_if ARM64_HAS_CACHE_IDC + dsb ishst + b .Ldc_skip_\@ +alternative_else_nop_endif + mov x2, x0 + mov x3, x1 + dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup +.Ldc_skip_\@: +alternative_if ARM64_HAS_CACHE_DIC + isb + b .Lic_skip_\@ +alternative_else_nop_endif + invalidate_icache_by_line x0, x1, x2, x3, \fixup +.Lic_skip_\@: +.endm /* - * __flush_cache_user_range(start,end) + * caches_clean_inval_pou(start,end) * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -37,117 +52,103 @@ SYM_FUNC_START(__flush_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(__flush_cache_user_range) +SYM_FUNC_START(caches_clean_inval_pou) + caches_clean_inval_pou_macro + ret +SYM_FUNC_END(caches_clean_inval_pou) + +/* + * caches_clean_inval_user_pou(start,end) + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +SYM_FUNC_START(caches_clean_inval_user_pou) uaccess_ttbr0_enable x2, x3, x4 -alternative_if ARM64_HAS_CACHE_IDC - dsb ishst - b 7f -alternative_else_nop_endif - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x4, x0, x3 -1: -user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE - add x4, x4, x2 - cmp x4, x1 - b.lo 1b - dsb ish -7: -alternative_if ARM64_HAS_CACHE_DIC - isb - b 8f -alternative_else_nop_endif - invalidate_icache_by_line x0, x1, x2, x3, 9f -8: mov x0, #0 + caches_clean_inval_pou_macro 2f + mov x0, xzr 1: uaccess_ttbr0_disable x1, x2 ret -9: +2: mov x0, #-EFAULT b 1b -SYM_FUNC_END(__flush_icache_range) -SYM_FUNC_END(__flush_cache_user_range) +SYM_FUNC_END(caches_clean_inval_user_pou) /* - * invalidate_icache_range(start,end) + * icache_inval_pou(start,end) * * Ensure that the I cache is invalid within specified region. * * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(invalidate_icache_range) +SYM_FUNC_START(icache_inval_pou) alternative_if ARM64_HAS_CACHE_DIC - mov x0, xzr isb ret alternative_else_nop_endif - uaccess_ttbr0_enable x2, x3, x4 - - invalidate_icache_by_line x0, x1, x2, x3, 2f - mov x0, xzr -1: - uaccess_ttbr0_disable x1, x2 + invalidate_icache_by_line x0, x1, x2, x3 ret -2: - mov x0, #-EFAULT - b 1b -SYM_FUNC_END(invalidate_icache_range) +SYM_FUNC_END(icache_inval_pou) /* - * __flush_dcache_area(kaddr, size) + * dcache_clean_inval_poc(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned and invalidated to the PoC. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ -SYM_FUNC_START_PI(__flush_dcache_area) +SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__flush_dcache_area) +SYM_FUNC_END_PI(dcache_clean_inval_poc) /* - * __clean_dcache_area_pou(kaddr, size) + * dcache_clean_pou(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoU. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ -SYM_FUNC_START(__clean_dcache_area_pou) +SYM_FUNC_START(dcache_clean_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret -SYM_FUNC_END(__clean_dcache_area_pou) +SYM_FUNC_END(dcache_clean_pou) /* - * __inval_dcache_area(kaddr, size) + * dcache_inval_poc(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are invalidated. Any partial lines at the ends of the interval are * also cleaned to PoC to prevent data loss. * - * - kaddr - kernel address - * - size - size in question + * - start - kernel start address of region + * - end - kernel end address of region */ SYM_FUNC_START_LOCAL(__dma_inv_area) -SYM_FUNC_START_PI(__inval_dcache_area) +SYM_FUNC_START_PI(dcache_inval_poc) /* FALLTHROUGH */ /* - * __dma_inv_area(start, size) + * __dma_inv_area(start, end) * - start - virtual start address of region - * - size - size in question + * - end - virtual end address of region */ - add x1, x1, x0 dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -165,48 +166,48 @@ SYM_FUNC_START_PI(__inval_dcache_area) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(__inval_dcache_area) +SYM_FUNC_END_PI(dcache_inval_poc) SYM_FUNC_END(__dma_inv_area) /* - * __clean_dcache_area_poc(kaddr, size) + * dcache_clean_poc(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoC. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START_LOCAL(__dma_clean_area) -SYM_FUNC_START_PI(__clean_dcache_area_poc) +SYM_FUNC_START_PI(dcache_clean_poc) /* FALLTHROUGH */ /* - * __dma_clean_area(start, size) + * __dma_clean_area(start, end) * - start - virtual start address of region - * - size - size in question + * - end - virtual end address of region */ dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__clean_dcache_area_poc) +SYM_FUNC_END_PI(dcache_clean_poc) SYM_FUNC_END(__dma_clean_area) /* - * __clean_dcache_area_pop(kaddr, size) + * dcache_clean_pop(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoP. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ -SYM_FUNC_START_PI(__clean_dcache_area_pop) +SYM_FUNC_START_PI(dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP - b __clean_dcache_area_poc + b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__clean_dcache_area_pop) +SYM_FUNC_END_PI(dcache_clean_pop) /* * __dma_flush_area(start, size) @@ -217,6 +218,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop) * - size - size in question */ SYM_FUNC_START_PI(__dma_flush_area) + add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__dma_flush_area) @@ -228,6 +230,7 @@ SYM_FUNC_END_PI(__dma_flush_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_map_area) + add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area b __dma_clean_area @@ -240,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_unmap_area) + add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_area ret diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 6d44c028d1c9..2aaf950b906c 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -14,28 +14,25 @@ #include <asm/cache.h> #include <asm/tlbflush.h> -void sync_icache_aliases(void *kaddr, unsigned long len) +void sync_icache_aliases(unsigned long start, unsigned long end) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __clean_dcache_area_pou(kaddr, len); - __flush_icache_all(); + dcache_clean_pou(start, end); + icache_inval_all_pou(); } else { /* * Don't issue kick_all_cpus_sync() after I-cache invalidation * for user mappings. */ - __flush_icache_range(addr, addr + len); + caches_clean_inval_pou(start, end); } } -static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, - unsigned long uaddr, void *kaddr, - unsigned long len) +static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start, + unsigned long end) { if (vma->vm_flags & VM_EXEC) - sync_icache_aliases(kaddr, len); + sync_icache_aliases(start, end); } /* @@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long len) { memcpy(dst, src, len); - flush_ptrace_access(vma, page, uaddr, dst, len); + flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len); } void __sync_icache_dcache(pte_t pte) @@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte) struct page *page = pte_page(pte); if (!test_bit(PG_dcache_clean, &page->flags)) { - sync_icache_aliases(page_address(page), page_size(page)); + sync_icache_aliases((unsigned long)page_address(page), + (unsigned long)page_address(page) + + page_size(page)); set_bit(PG_dcache_clean, &page->flags); } } @@ -77,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page); /* * Additional functions defined in assembly. */ -EXPORT_SYMBOL(__flush_icache_range); +EXPORT_SYMBOL(caches_clean_inval_pou); #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size) { /* Ensure order against any prior non-cacheable writes */ dmb(osh); - __clean_dcache_area_pop(addr, size); + dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { - __inval_dcache_area(addr, size); + dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6dd9369e3ea0..89b66ef43a0f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) + if (rodata_full || crash_mem_map || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* |