diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2022-05-20 14:16:27 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2022-05-25 12:09:23 +0300 |
commit | 47e8eec83262083c7da220446551eaad614218ea (patch) | |
tree | 1bcdf6cb6541441d1042fdf68c2f7982d80a9178 /arch/arm64 | |
parent | 825be3b5abae1e67db45ff7d4b9a7764a2419bd9 (diff) | |
parent | 5c0ad551e9aa6188f2bda0977c1cb6768a2b74ef (diff) | |
download | linux-47e8eec83262083c7da220446551eaad614218ea.tar.xz |
Merge tag 'kvmarm-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 5.19
- Add support for the ARMv8.6 WFxT extension
- Guard pages for the EL2 stacks
- Trap and emulate AArch32 ID registers to hide unsupported features
- Ability to select and save/restore the set of hypercalls exposed
to the guest
- Support for PSCI-initiated suspend in collaboration with userspace
- GICv3 register-based LPI invalidation support
- Move host PMU event merging into the vcpu data structure
- GICv3 ITS save/restore fixes
- The usual set of small-scale cleanups and fixes
[Due to the conflict, KVM_SYSTEM_EVENT_SEV_TERM is relocated
from 4 to 6. - Paolo]
Diffstat (limited to 'arch/arm64')
91 files changed, 3481 insertions, 846 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 57c4c995965f..0dbae092c9b9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -175,8 +175,6 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS \ - if $(cc-option,-fpatchable-function-entry=2) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \ if DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -228,6 +226,17 @@ config ARM64 help ARM 64-bit (AArch64) Linux support. +config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS + def_bool CC_IS_CLANG + # https://github.com/ClangBuiltLinux/linux/issues/1507 + depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600)) + select HAVE_DYNAMIC_FTRACE_WITH_REGS + +config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS + def_bool CC_IS_GCC + depends on $(cc-option,-fpatchable-function-entry=2) + select HAVE_DYNAMIC_FTRACE_WITH_REGS + config 64BIT def_bool y @@ -678,7 +687,7 @@ config ARM64_ERRATUM_2051678 default y help This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678. - Affected Coretex-A510 might not respect the ordering rules for + Affected Cortex-A510 might not respect the ordering rules for hardware update of the page table's dirty bit. The workaround is to not enable the feature on affected CPUs. @@ -1939,6 +1948,17 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. +config ARM64_SME + bool "ARM Scalable Matrix Extension support" + default y + depends on ARM64_SVE + help + The Scalable Matrix Extension (SME) is an extension to the AArch64 + execution state which utilises a substantial subset of the SVE + instruction set, together with the addition of new architectural + register state capable of holding two dimensional matrix tiles to + enable various matrix operations. + config ARM64_MODULE_PLTS bool "Use PLTs to allow module memory to spill over into vmalloc area" depends on MODULES diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi index d61f43052a34..8e9ad1e51d66 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <761000>; @@ -71,26 +51,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi index 1e5d0ee5d541..44c23c984034 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; @@ -76,26 +56,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <751000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <751000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <751000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <751000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <771000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi index bf9ae1e1016b..480afa2cc61f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi @@ -13,28 +13,28 @@ cpu0: cpu@0 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x0>; enable-method = "psci"; }; cpu1: cpu@1 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x1>; enable-method = "psci"; }; cpu2: cpu@2 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x2>; enable-method = "psci"; }; cpu3: cpu@3 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x3>; enable-method = "psci"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts index 5751c48620ed..cadba194b149 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts @@ -437,6 +437,7 @@ "", "eMMC_RST#", /* BOOT_12 */ "eMMC_DS", /* BOOT_13 */ + "", "", /* GPIOC */ "SD_D0_B", /* GPIOC_0 */ "SD_D1_B", /* GPIOC_1 */ diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi index 3c07a89bfd27..80737731af3f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi @@ -95,26 +95,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <730000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <730000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <730000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <666666666>; - opp-microvolt = <750000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <770000>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 1dc9d187601c..a0bd540f27d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -89,12 +89,12 @@ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi index 73addc0b8e57..cce55c3c5df0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi @@ -146,12 +146,14 @@ &usbotg1 { dr_mode = "otg"; + over-current-active-low; vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; &usbotg2 { dr_mode = "host"; + disable-over-current; status = "okay"; }; @@ -215,7 +217,7 @@ fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6 - MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 + MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6 MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi index 1e7badb2a82e..f61e4847fa49 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi @@ -211,12 +211,14 @@ &usbotg1 { dr_mode = "otg"; + over-current-active-low; vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; &usbotg2 { dr_mode = "host"; + disable-over-current; vbus-supply = <®_usb_otg2_vbus>; status = "okay"; }; @@ -309,7 +311,7 @@ fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6 - MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 + MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6 MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi index 426483ec1f88..023619648966 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi @@ -238,12 +238,14 @@ &usbotg1 { dr_mode = "otg"; + over-current-active-low; vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; &usbotg2 { dr_mode = "host"; + disable-over-current; vbus-supply = <®_usb_otg2_vbus>; status = "okay"; }; @@ -358,7 +360,7 @@ fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6 - MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 + MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6 MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 7dfee715a2c4..d8ce217c6016 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -59,6 +59,10 @@ interrupts = <3 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; + #clock-cells = <0>; + clocks = <&osc_32k 0>; + clock-output-names = "clk-32k-out"; + regulators { buck1_reg: BUCK1 { regulator-name = "buck1"; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index b16c7caf34c1..87b5e23c766f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -70,12 +70,12 @@ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 99f0f5026674..5c0ca2490561 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -293,7 +293,7 @@ ranges; sai2: sai@30020000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30020000 0x10000>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SAI2_IPG>, @@ -307,7 +307,7 @@ }; sai3: sai@30030000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30030000 0x10000>; interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SAI3_IPG>, @@ -321,7 +321,7 @@ }; sai5: sai@30050000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30050000 0x10000>; interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SAI5_IPG>, @@ -337,7 +337,7 @@ }; sai6: sai@30060000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30060000 0x10000>; interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SAI6_IPG>, @@ -394,7 +394,7 @@ }; sai7: sai@300b0000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x300b0000 0x10000>; interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SAI7_IPG>, diff --git a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi index 38ffcd145b33..899e8e7dbc24 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi @@ -253,7 +253,7 @@ #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <84000000>; - spi-tx-bus-width = <4>; + spi-tx-bus-width = <1>; spi-rx-bus-width = <4>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm.dtsi b/arch/arm64/boot/dts/freescale/imx8qm.dtsi index be8c76a0554c..4f767012f1f5 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm.dtsi @@ -196,7 +196,7 @@ }; clk: clock-controller { - compatible = "fsl,imx8qxp-clk", "fsl,scu-clk"; + compatible = "fsl,imx8qm-clk", "fsl,scu-clk"; #clock-cells = <2>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index f0f81c23c16f..b9a48cfd760f 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -1249,14 +1249,14 @@ pins = "gpio47", "gpio48"; function = "blsp_i2c3"; drive-strength = <16>; - bias-disable = <0>; + bias-disable; }; blsp1_i2c3_sleep: blsp1-i2c2-sleep { pins = "gpio47", "gpio48"; function = "gpio"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; blsp2_uart3_4pins_default: blsp2-uart2-4pins { diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi index e90f99ef5323..e47c74e513af 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi @@ -33,7 +33,7 @@ ap_h1_spi: &spi0 {}; }; &alc5682 { - realtek,dmic-clk-driving-high = "true"; + realtek,dmic-clk-driving-high; }; &cpu6_alert0 { diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi index 1084d5ce9ac7..07b729f9fec5 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi @@ -630,7 +630,7 @@ pins = "gpio6", "gpio25", "gpio26"; function = "gpio"; drive-strength = <8>; - bias-disable = <0>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts index 8553c8bf79bd..103cc40816fd 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts @@ -563,7 +563,7 @@ config { pins = "gpio6", "gpio11"; drive-strength = <8>; - bias-disable = <0>; + bias-disable; }; }; diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 62217be36217..9f3e2c3d2ca0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -16,7 +16,11 @@ #define sev() asm volatile("sev" : : : "memory") #define wfe() asm volatile("wfe" : : : "memory") +#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \ + : : "r" (val) : "memory") #define wfi() asm volatile("wfi" : : : "memory") +#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \ + : : "r" (val) : "memory") #define isb() asm volatile("isb" : : : "memory") #define dmb(opt) asm volatile("dmb " #opt : : : "memory") diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index a58e366f0b07..115cdec1ae87 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -58,11 +58,15 @@ struct cpuinfo_arm64 { u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; + u64 reg_id_aa64smfr0; struct cpuinfo_32bit aarch32; /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ u64 reg_zcr; + + /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ + u64 reg_smcr; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c62e7e5e2f0c..14a8f3d93add 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) return val > 0; } +static inline bool id_aa64pfr1_sme(u64 pfr1) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT); + + return val > 0; +} + static inline bool id_aa64pfr1_mte(u64 pfr1) { u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT); @@ -759,6 +766,23 @@ static __always_inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static __always_inline bool system_supports_sme(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME); +} + +static __always_inline bool system_supports_fa64(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME_FA64); +} + +static __always_inline bool system_supports_tpidr2(void) +{ + return system_supports_sme(); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 232b439cbaf3..60647bdc0b09 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -117,6 +118,10 @@ #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 +#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024 +#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 +#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 +#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -130,6 +135,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) @@ -162,6 +168,10 @@ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) +#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) +#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) +#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) +#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 7f3c87f7a0ce..fabdbde0fe02 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -107,7 +107,7 @@ isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm @@ -143,6 +143,50 @@ .Lskip_sve_\@: .endm +/* SME register access and priority mapping */ +.macro __init_el2_nvhe_sme + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x1, #0 // SMCR controls + + mrs_s x2, SYS_ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM? + cbz x2, .Lskip_sme_fa64_\@ + + orr x1, x1, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64_\@: + + orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x1 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SYS_SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme_\@ + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal + + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + mrs_s x1, SYS_HCRX_EL2 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping + msr_s SYS_HCRX_EL2, x1 + +.Lskip_sme_\@: +.endm + /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -153,15 +197,26 @@ mrs x1, id_aa64dfr0_el1 ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cmp x1, #3 - b.lt .Lset_fgt_\@ + b.lt .Lset_debug_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ orr x0, x0, #(1 << 62) -.Lset_fgt_\@: +.Lset_debug_fgt_\@: msr_s SYS_HDFGRTR_EL2, x0 msr_s SYS_HDFGWTR_EL2, x0 - msr_s SYS_HFGRTR_EL2, xzr - msr_s SYS_HFGWTR_EL2, xzr + + mov x0, xzr + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable nVHE traps of TPIDR2 and SMPRI */ + orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK + orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK + +.Lset_fgt_\@: + msr_s SYS_HFGRTR_EL2, x0 + msr_s SYS_HFGWTR_EL2, x0 msr_s SYS_HFGITR_EL2, xzr mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU @@ -196,6 +251,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_nvhe_sme __init_el2_fgt __init_el2_nvhe_prepare_eret .endm diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d52a0b269ee8..0bc9941b412e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -37,7 +37,8 @@ #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ /* Unallocated EC: 0x1B */ #define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ -/* Unallocated EC: 0x1D - 0x1E */ +#define ESR_ELx_EC_SME (0x1D) +/* Unallocated EC: 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) @@ -75,6 +76,7 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) @@ -133,7 +135,10 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) -#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5) +#define ESR_ELx_WFx_ISS_RV (UL(1) << 2) +#define ESR_ELx_WFx_ISS_TI (UL(3) << 0) +#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0) #define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -146,7 +151,8 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ -#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \ + (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT)) #define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ ESR_ELx_WFx_ISS_WFI) @@ -327,6 +333,15 @@ #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) +/* + * ISS values for SME traps + */ + +#define ESR_ELx_SME_ISS_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_ILL 1 +#define ESR_ELx_SME_ISS_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_ZA_DISABLED 3 + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 339477dca551..2add7f33b7c2 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -64,6 +64,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); void do_sve_acc(unsigned int esr, struct pt_regs *regs); +void do_sme_acc(unsigned int esr, struct pt_regs *regs); void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); void do_sysinstr(unsigned int esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index cb24385e3632..5afcd0709aae 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -46,11 +46,23 @@ extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl); + void *sve_state, unsigned int sve_vl, + void *za_state, unsigned int sme_vl, + u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); +static inline bool thread_sm_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK); +} + +static inline bool thread_za_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK); +} + /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 @@ -62,7 +74,14 @@ static inline size_t sve_ffr_offset(int vl) static inline void *sve_pffr(struct thread_struct *thread) { - return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); + unsigned int vl; + + if (system_supports_sme() && thread_sm_enabled(thread)) + vl = thread_get_sme_vl(thread); + else + vl = thread_get_sve_vl(thread); + + return (char *)thread->sve_state + sve_ffr_offset(vl); } extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); @@ -71,11 +90,17 @@ extern void sve_load_state(void const *state, u32 const *pfpsr, extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); +extern void sme_set_vq(unsigned long vq_minus_1); +extern void za_save_state(void *state); +extern void za_load_state(void const *state); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); +extern u64 read_smcr_features(void); /* * Helpers to translate bit indices in sve_vq_map to VQ values (and @@ -119,6 +144,7 @@ struct vl_info { extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); +extern void fpsimd_force_sync_to_sve(struct task_struct *task); extern void sve_sync_to_fpsimd(struct task_struct *task); extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); @@ -171,6 +197,12 @@ static inline void write_vl(enum vec_type type, u64 val) write_sysreg_s(tmp | val, SYS_ZCR_EL1); break; #endif +#ifdef CONFIG_ARM64_SME + case ARM64_VEC_SME: + tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_SMCR_EL1); + break; +#endif default: WARN_ON_ONCE(1); break; @@ -208,6 +240,8 @@ static inline bool sve_vq_available(unsigned int vq) return vq_available(ARM64_VEC_SVE, vq); } +size_t sve_state_size(struct task_struct const *task); + #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task) { } @@ -247,8 +281,93 @@ static inline void vec_update_vq_map(enum vec_type t) { } static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } +static inline size_t sve_state_size(struct task_struct const *task) +{ + return 0; +} + #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static inline void sme_user_disable(void) +{ + sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0); +} + +static inline void sme_user_enable(void) +{ + sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN); +} + +static inline void sme_smstart_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); +} + +static inline void sme_smstop_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr")); +} + +static inline void sme_smstop(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); +} + +extern void __init sme_setup(void); + +static inline int sme_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SME); +} + +static inline int sme_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SME); +} + +extern void sme_alloc(struct task_struct *task); +extern unsigned int sme_get_vl(void); +extern int sme_set_current_vl(unsigned long arg); +extern int sme_get_current_vl(void); + +/* + * Return how many bytes of memory are required to store the full SME + * specific state (currently just ZA) for task, given task's currently + * configured vector length. + */ +static inline size_t za_state_size(struct task_struct const *task) +{ + unsigned int vl = task_get_sme_vl(task); + + return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + +#else + +static inline void sme_user_disable(void) { BUILD_BUG(); } +static inline void sme_user_enable(void) { BUILD_BUG(); } + +static inline void sme_smstart_sm(void) { } +static inline void sme_smstop_sm(void) { } +static inline void sme_smstop(void) { } + +static inline void sme_alloc(struct task_struct *task) { } +static inline void sme_setup(void) { } +static inline unsigned int sme_get_vl(void) { return 0; } +static inline int sme_max_vl(void) { return 0; } +static inline int sme_max_virtualisable_vl(void) { return 0; } +static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } +static inline int sme_get_current_vl(void) { return -EINVAL; } + +static inline size_t za_state_size(struct task_struct const *task) +{ + return 0; +} + +#endif /* ! CONFIG_ARM64_SME */ + /* For use by EFI runtime services calls only */ extern void __efi_fpsimd_begin(void); extern void __efi_fpsimd_end(void); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 2509d7dde55a..5e0910cf4832 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -93,6 +93,12 @@ .endif .endm +.macro _sme_check_wv v + .if (\v) < 12 || (\v) > 15 + .error "Bad vector select register \v." + .endif +.endm + /* SVE instruction encodings for non-SVE-capable assemblers */ /* (pre binutils 2.28, all kernel capable clang versions support SVE) */ @@ -174,6 +180,54 @@ | (\np) .endm +/* SME instruction encodings for non-SME-capable assemblers */ +/* (pre binutils 2.38/LLVM 13) */ + +/* RDSVL X\nx, #\imm */ +.macro _sme_rdsvl nx, imm + _check_general_reg \nx + _check_num (\imm), -0x20, 0x1f + .inst 0x04bf5800 \ + | (\nx) \ + | (((\imm) & 0x3f) << 5) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_str_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_ldr_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * Zero the entire ZA array + * ZERO ZA + */ +.macro zero_za + .inst 0xc00800ff +.endm + .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from @@ -208,6 +262,17 @@ 921: .endm +/* Update SMCR_EL1.LEN with the new VQ */ +.macro sme_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_SMCR_EL1 + bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising +921: +.endm + /* Preserve the first 128-bits of Znz and zero the rest. */ .macro _sve_flush_z nz _sve_check_zreg \nz @@ -254,3 +319,25 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm + +.macro sme_save_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_str_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm + +.macro sme_load_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_ldr_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8db5ec0089db..aa443d8f8cfb 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -109,6 +109,15 @@ #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) #define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) +#define KERNEL_HWCAP_SME __khwcap2_feature(SME) +#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64) +#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64) +#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32) +#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32) +#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32) +#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32) +#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) +#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1767ded83888..8aa8492dafc0 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -80,11 +80,12 @@ * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate * PTW: Take a stage2 fault if a stage1 walk steps in device memory + * TID3: Trap EL1 reads of group 3 ID registers */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO | HCR_PTW ) + HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) @@ -279,6 +280,7 @@ #define CPTR_EL2_TCPAC (1U << 31) #define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TSM (1 << 12) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) #define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d5b0386ef765..2e277f2ed671 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -169,6 +169,7 @@ struct kvm_nvhe_init_params { unsigned long tcr_el2; unsigned long tpidr_el2; unsigned long stack_hyp_va; + unsigned long stack_pa; phys_addr_t pgd_pa; unsigned long hcr_el2; unsigned long vttbr; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 7496deab025a..07812680fcaf 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -40,6 +40,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_size_fault(struct kvm_vcpu *vcpu); void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); @@ -86,13 +87,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_RW; - else - /* - * TID3: trap feature register accesses that we virtualise. - * For now this is conditional, since no AArch32 feature regs - * are currently virtualised. - */ - vcpu->arch.hcr_el2 |= HCR_TID3; if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || vcpu_el1_is_32bit(vcpu)) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 27ebb2929e0c..78e718a1b23d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) +#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -101,6 +102,19 @@ struct kvm_s2_mmu { struct kvm_arch_memory_slot { }; +/** + * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests + * + * @std_bmap: Bitmap of standard secure service calls + * @std_hyp_bmap: Bitmap of standard hypervisor service calls + * @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls + */ +struct kvm_smccc_features { + unsigned long std_bmap; + unsigned long std_hyp_bmap; + unsigned long vendor_hyp_bmap; +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -133,6 +147,8 @@ struct kvm_arch { */ #define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 #define KVM_ARCH_FLAG_EL1_32BIT 4 + /* PSCI SYSTEM_SUSPEND enabled for the guest */ +#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 unsigned long flags; @@ -147,6 +163,9 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + + /* Hypercall features firmware registers' descriptor */ + struct kvm_smccc_features smccc_feat; }; struct kvm_vcpu_fault_info { @@ -251,14 +270,8 @@ struct kvm_cpu_context { struct kvm_vcpu *__hyp_running_vcpu; }; -struct kvm_pmu_events { - u32 events_host; - u32 events_guest; -}; - struct kvm_host_data { struct kvm_cpu_context host_ctxt; - struct kvm_pmu_events pmu_events; }; struct kvm_host_psci_config { @@ -292,8 +305,11 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + + /* Guest floating point state */ void *sve_state; unsigned int sve_max_vl; + u64 svcr; /* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; @@ -362,8 +378,8 @@ struct kvm_vcpu_arch { u32 mdscr_el1; } guest_debug_preserved; - /* vcpu power-off state */ - bool power_off; + /* vcpu power state */ + struct kvm_mp_state mp_state; /* Don't run the guest (internal implementation need) */ bool pause; @@ -448,6 +464,8 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ #define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) #define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ +#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */ +#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ @@ -680,10 +698,11 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); +int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); -void kvm_sys_reg_table_init(void); +int kvm_sys_reg_table_init(void); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); @@ -792,9 +811,6 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); - -void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); -void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} @@ -826,8 +842,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_has_mte(kvm) \ (system_supports_mte() && \ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) -#define kvm_vcpu_has_pmu(vcpu) \ - (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM @@ -838,4 +852,7 @@ void __init kvm_hyp_reserve(void); static inline void kvm_hyp_reserve(void) { } #endif +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); +bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 74735a864eee..b208da3bebec 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -154,6 +154,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) int kvm_share_hyp(void *from, void *to); void kvm_unshare_hyp(void *from, void *to); int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); +int __create_hyp_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot); +int hyp_alloc_private_va_range(size_t size, unsigned long *haddr); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 94e147e5456c..dff2b483ea50 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -535,7 +535,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) -#define pmd_leaf(pmd) pmd_sect(pmd) +#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) #define pmd_bad(pmd) (!pmd_table(pmd)) #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) @@ -625,7 +625,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) -#define pud_leaf(pud) pud_sect(pud) +#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 73e38d9a540c..1d2ca4870b84 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -118,6 +118,7 @@ struct debug_info { enum vec_type { ARM64_VEC_SVE = 0, + ARM64_VEC_SME, ARM64_VEC_MAX, }; @@ -153,6 +154,7 @@ struct thread_struct { unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ + void *za_state; /* ZA register, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ @@ -168,6 +170,8 @@ struct thread_struct { u64 mte_ctrl; #endif u64 sctlr_user; + u64 svcr; + u64 tpidr2_el0; }; static inline unsigned int thread_get_vl(struct thread_struct *thread, @@ -181,6 +185,19 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) return thread_get_vl(thread, ARM64_VEC_SVE); } +static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) +{ + return thread_get_vl(thread, ARM64_VEC_SME); +} + +static inline unsigned int thread_get_cur_vl(struct thread_struct *thread) +{ + if (system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK)) + return thread_get_sme_vl(thread); + else + return thread_get_sve_vl(thread); +} + unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); void task_set_vl(struct task_struct *task, enum vec_type type, unsigned long vl); @@ -194,6 +211,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task) return task_get_vl(task, ARM64_VEC_SVE); } +static inline unsigned int task_get_sme_vl(const struct task_struct *task) +{ + return task_get_vl(task, ARM64_VEC_SME); +} + static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) { task_set_vl(task, ARM64_VEC_SVE, vl); @@ -354,9 +376,11 @@ extern void __init minsigstksz_setup(void); */ #include <asm/fpsimd.h> -/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ +/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() +#define SME_SET_VL(arg) sme_set_current_vl(arg) +#define SME_GET_VL() sme_get_current_vl() /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fbf5f8bb9055..bebfdd27296a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -118,6 +118,10 @@ * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR<n>_EL1. */ +#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3) +#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) +#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) + #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -181,6 +185,7 @@ #define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) #define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) +#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -204,6 +209,8 @@ #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) +#define SYS_SMPRI_EL1 sys_reg(3, 0, 1, 2, 4) +#define SYS_SMCR_EL1 sys_reg(3, 0, 1, 2, 6) #define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) #define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) @@ -396,6 +403,8 @@ #define TRBIDR_ALIGN_MASK GENMASK(3, 0) #define TRBIDR_ALIGN_SHIFT 0 +#define SMPRI_EL1_PRIORITY_MASK 0xf + #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -451,8 +460,13 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) +#define SYS_SMIDR_EL1 sys_reg(3, 1, 0, 0, 6) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) +#define SYS_SMIDR_EL1_IMPLEMENTER_SHIFT 24 +#define SYS_SMIDR_EL1_SMPS_SHIFT 15 +#define SYS_SMIDR_EL1_AFFINITY_SHIFT 0 + #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) @@ -461,6 +475,10 @@ #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) +#define SYS_SVCR_EL0 sys_reg(3, 3, 4, 2, 2) +#define SYS_SVCR_EL0_ZA_MASK 2 +#define SYS_SVCR_EL0_SM_MASK 1 + #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) @@ -477,6 +495,7 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5) #define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) @@ -546,6 +565,9 @@ #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) +#define SYS_SMPRIMAP_EL2 sys_reg(3, 4, 1, 2, 5) +#define SYS_SMCR_EL2 sys_reg(3, 4, 1, 2, 6) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) @@ -605,6 +627,7 @@ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) @@ -628,6 +651,7 @@ #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) #define SCTLR_ELx_ATA (BIT(43)) @@ -836,6 +860,7 @@ #define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 #define ID_AA64PFR1_MTE_SHIFT 8 @@ -846,6 +871,7 @@ #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 #define ID_AA64PFR1_BT_BTI 0x1 +#define ID_AA64PFR1_SME 1 #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 @@ -874,6 +900,23 @@ #define ID_AA64ZFR0_AES_PMULL 0x2 #define ID_AA64ZFR0_SVEVER_SVE2 0x1 +/* id_aa64smfr0 */ +#define ID_AA64SMFR0_FA64_SHIFT 63 +#define ID_AA64SMFR0_I16I64_SHIFT 52 +#define ID_AA64SMFR0_F64F64_SHIFT 48 +#define ID_AA64SMFR0_I8I32_SHIFT 36 +#define ID_AA64SMFR0_F16F32_SHIFT 35 +#define ID_AA64SMFR0_B16F32_SHIFT 34 +#define ID_AA64SMFR0_F32F32_SHIFT 32 + +#define ID_AA64SMFR0_FA64 0x1 +#define ID_AA64SMFR0_I16I64 0x4 +#define ID_AA64SMFR0_F64F64 0x1 +#define ID_AA64SMFR0_I8I32 0x4 +#define ID_AA64SMFR0_F16F32 0x1 +#define ID_AA64SMFR0_B16F32 0x1 +#define ID_AA64SMFR0_F32F32 0x1 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_ECV_SHIFT 60 #define ID_AA64MMFR0_FGT_SHIFT 56 @@ -926,6 +969,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_HCX_SHIFT 40 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 @@ -1119,9 +1163,24 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define SMCR_ELx_FA64_SHIFT 31 +#define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT) + +/* + * The SMCR_ELx_LEN_* definitions intentionally include bits [8:4] which + * are reserved by the SME architecture for future expansion of the LEN + * field, with compatible semantics. + */ +#define SMCR_ELx_LEN_SHIFT 0 +#define SMCR_ELx_LEN_SIZE 9 +#define SMCR_ELx_LEN_MASK 0x1ff + #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */ +#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */ + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ @@ -1170,6 +1229,8 @@ #define TRFCR_ELx_ExTRE BIT(1) #define TRFCR_ELx_E0TRE BIT(0) +/* HCRX_EL2 definitions */ +#define HCRX_EL2_SMPME_MASK (1 << 5) /* GIC Hypervisor interface registers */ /* ICH_MISR_EL2 bit definitions */ @@ -1233,6 +1294,12 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) +/* HFG[WR]TR_EL2 bit definitions */ +#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55 +#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT) +#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54 +#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT) + #define ARM64_FEATURE_FIELD_BITS 4 /* Create a mask for the feature bits of the specified feature. */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e1317b7c4525..848739c15de8 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,8 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME 27 /* SME in use */ +#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 99cb5d383048..4bb2cc8ac446 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -79,5 +79,14 @@ #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) #define HWCAP2_MTE3 (1 << 22) +#define HWCAP2_SME (1 << 23) +#define HWCAP2_SME_I16I64 (1 << 24) +#define HWCAP2_SME_F64F64 (1 << 25) +#define HWCAP2_SME_I8I32 (1 << 26) +#define HWCAP2_SME_F16F32 (1 << 27) +#define HWCAP2_SME_B16F32 (1 << 28) +#define HWCAP2_SME_F32F32 (1 << 29) +#define HWCAP2_SME_FA64 (1 << 30) +#define HWCAP2_WFXT (1UL << 31) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index c1b6ddc02d2f..d4722b08f0d4 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -332,6 +332,40 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM64_SVE_VLS_WORDS \ ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1) +/* Bitmap feature firmware registers */ +#define KVM_REG_ARM_FW_FEAT_BMAP (0x0016 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_FEAT_BMAP_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW_FEAT_BMAP | \ + ((r) & 0xffff)) + +#define KVM_REG_ARM_STD_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(0) + +enum { + KVM_REG_ARM_STD_BIT_TRNG_V1_0 = 0, +#ifdef __KERNEL__ + KVM_REG_ARM_STD_BMAP_BIT_COUNT, +#endif +}; + +#define KVM_REG_ARM_STD_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(1) + +enum { + KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0, +#ifdef __KERNEL__ + KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT, +#endif +}; + +#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) + +enum { + KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT = 0, + KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, +#ifdef __KERNEL__ + KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT, +#endif +}; + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 758ae984ff97..7fa2f7036aa7 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -109,7 +109,7 @@ struct user_hwdebug_state { } dbg_regs[16]; }; -/* SVE/FP/SIMD state (NT_ARM_SVE) */ +/* SVE/FP/SIMD state (NT_ARM_SVE & NT_ARM_SSVE) */ struct user_sve_header { __u32 size; /* total meaningful regset content in bytes */ @@ -220,6 +220,7 @@ struct user_sve_header { (SVE_PT_SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - \ SVE_PT_SVE_PREGS_OFFSET(vq)) +/* For streaming mode SVE (SSVE) FFR must be read and written as zero */ #define SVE_PT_SVE_FFR_OFFSET(vq) \ (SVE_PT_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) @@ -240,10 +241,12 @@ struct user_sve_header { - SVE_PT_SVE_OFFSET + (__SVE_VQ_BYTES - 1)) \ / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define SVE_PT_SIZE(vq, flags) \ - (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ - SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ - : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) +#define SVE_PT_SIZE(vq, flags) \ + (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ + SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ + : ((((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD ? \ + SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags) \ + : SVE_PT_REGS_OFFSET))) /* pointer authentication masks (NT_ARM_PAC_MASK) */ @@ -265,6 +268,62 @@ struct user_pac_generic_keys { __uint128_t apgakey; }; +/* ZA state (NT_ARM_ZA) */ + +struct user_za_header { + __u32 size; /* total meaningful regset content in bytes */ + __u32 max_size; /* maxmium possible size for this thread */ + __u16 vl; /* current vector length */ + __u16 max_vl; /* maximum possible vector length */ + __u16 flags; + __u16 __reserved; +}; + +/* + * Common ZA_PT_* flags: + * These must be kept in sync with prctl interface in <linux/prctl.h> + */ +#define ZA_PT_VL_INHERIT ((1 << 17) /* PR_SME_VL_INHERIT */ >> 16) +#define ZA_PT_VL_ONEXEC ((1 << 18) /* PR_SME_SET_VL_ONEXEC */ >> 16) + + +/* + * The remainder of the ZA state follows struct user_za_header. The + * total size of the ZA state (including header) depends on the + * metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size + * of the state in bytes, including the header. + * + * Refer to <asm/sigcontext.h> for details of how to pass the correct + * "vq" argument to these macros. + */ + +/* Offset from the start of struct user_za_header to the register data */ +#define ZA_PT_ZA_OFFSET \ + ((sizeof(struct user_za_header) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +/* + * The payload starts at offset ZA_PT_ZA_OFFSET, and is of size + * ZA_PT_ZA_SIZE(vq, flags). + * + * The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8 + * bytes each, starting from vector 0. + * + * Additional data might be appended in the future. + * + * The ZA matrix is represented in memory in an endianness-invariant layout + * which differs from the layout used for the FPSIMD V-registers on big-endian + * systems: see sigcontext.h for more explanation. + */ + +#define ZA_PT_ZAV_OFFSET(vq, n) \ + (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n)) + +#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_PT_SIZE(vq) \ + (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 0c796c795dbe..4aaf31e3bf16 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -134,6 +134,17 @@ struct extra_context { struct sve_context { struct _aarch64_ctx head; __u16 vl; + __u16 flags; + __u16 __reserved[2]; +}; + +#define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */ + +#define ZA_MAGIC 0x54366345 + +struct za_context { + struct _aarch64_ctx head; + __u16 vl; __u16 __reserved[3]; }; @@ -186,9 +197,16 @@ struct sve_context { * sve_context.vl must equal the thread's current vector length when * doing a sigreturn. * + * On systems with support for SME the SVE register state may reflect either + * streaming or non-streaming mode. In streaming mode the streaming mode + * vector length will be used and the flag SVE_SIG_FLAG_SM will be set in + * the flags field. It is permitted to enter or leave streaming mode in + * a signal return, applications should take care to ensure that any difference + * in vector length between the two modes is handled, including any resizing + * and movement of context blocks. * - * Note: for all these macros, the "vq" argument denotes the SVE - * vector length in quadwords (i.e., units of 128 bits). + * Note: for all these macros, the "vq" argument denotes the vector length + * in quadwords (i.e., units of 128 bits). * * The correct way to obtain vq is to use sve_vq_from_vl(vl). The * result is valid if and only if sve_vl_valid(vl) is true. This is @@ -249,4 +267,37 @@ struct sve_context { #define SVE_SIG_CONTEXT_SIZE(vq) \ (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) +/* + * If the ZA register is enabled for the thread at signal delivery then, + * za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl)) + * and the register data may be accessed using the ZA_SIG_*() macros. + * + * If za_context.head.size < ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl)) + * then ZA was not enabled and no register data was included in which case + * ZA register was not enabled for the thread and no register data + * the ZA_SIG_*() macros should not be used except for this check. + * + * The same convention applies when returning from a signal: a caller + * will need to remove or resize the za_context block if it wants to + * enable the ZA register when it was previously non-live or vice-versa. + * This may require the caller to allocate fresh memory and/or move other + * context blocks in the signal frame. + * + * Changing the vector length during signal return is not permitted: + * za_context.vl must equal the thread's current SME vector length when + * doing a sigreturn. + */ + +#define ZA_SIG_REGS_OFFSET \ + ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \ + (SVE_SIG_ZREG_SIZE(vq) * n)) + +#define ZA_SIG_CONTEXT_SIZE(vq) \ + (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 3fb79b76e9d9..7bbf5104b7b7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); return !(pc >= replptr && pc <= (replptr + alt->alt_len)); @@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c..78ed5fc762eb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -237,6 +237,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -261,6 +262,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), @@ -293,6 +296,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), @@ -561,6 +582,12 @@ static const struct arm64_ftr_bits ftr_zcr[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_smcr[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_SIZE, 0), /* LEN */ + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -645,6 +672,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, &id_aa64pfr1_override), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), + ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -666,6 +694,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), + ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr), /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -960,6 +989,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); + init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0); if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) init_32bit_cpu_features(&info->aarch32); @@ -969,6 +999,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) vec_init_vq_map(ARM64_VEC_SVE); } + if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr); + if (IS_ENABLED(CONFIG_ARM64_SME)) + vec_init_vq_map(ARM64_VEC_SME); + } + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); @@ -1195,6 +1231,9 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, + info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); + if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); @@ -1205,6 +1244,16 @@ void update_cpu_features(int cpu, vec_update_vq_map(ARM64_VEC_SVE); } + if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu, + info->reg_smcr, boot->reg_smcr); + + /* Probe vector lengths, unless we already gave up on SME */ + if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) && + !system_capabilities_finalized()) + vec_update_vq_map(ARM64_VEC_SME); + } + /* * The kernel uses the LDGM/STGM instructions and the number of tags * they read/write depends on the GMID_EL1.BS field. Check that the @@ -1288,6 +1337,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64PFR0_EL1); read_sysreg_case(SYS_ID_AA64PFR1_EL1); read_sysreg_case(SYS_ID_AA64ZFR0_EL1); + read_sysreg_case(SYS_ID_AA64SMFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR0_EL1); @@ -2442,6 +2492,44 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .min_field_value = 1, }, +#ifdef CONFIG_ARM64_SME + { + .desc = "Scalable Matrix Extension", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_SME_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_SME, + .matches = has_cpuid_feature, + .cpu_enable = sme_kernel_enable, + }, + /* FA64 should be sorted after the base SME capability */ + { + .desc = "FA64", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME_FA64, + .sys_reg = SYS_ID_AA64SMFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64SMFR0_FA64_SHIFT, + .field_width = 1, + .min_field_value = ID_AA64SMFR0_FA64, + .matches = has_cpuid_feature, + .cpu_enable = fa64_kernel_enable, + }, +#endif /* CONFIG_ARM64_SME */ + { + .desc = "WFx with timeout", + .capability = ARM64_HAS_WFXT, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_WFXT_SHIFT, + .field_width = 4, + .matches = has_cpuid_feature, + .min_field_value = ID_AA64ISAR2_WFXT_SUPPORTED, + }, {}, }; @@ -2575,6 +2663,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT), +#ifdef CONFIG_ARM64_SME + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), +#endif /* CONFIG_ARM64_SME */ {}, }; @@ -2872,6 +2971,23 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } +static void verify_sme_features(void) +{ + u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + u64 smcr = read_smcr_features(); + + unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK; + unsigned int len = smcr & SMCR_ELx_LEN_MASK; + + if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) { + pr_crit("CPU%d: SME: vector length support mismatch\n", + smp_processor_id()); + cpu_die_early(); + } + + /* Add checks on other SMCR bits here if necessary */ +} + static void verify_hyp_capabilities(void) { u64 safe_mmfr1, mmfr0, mmfr1; @@ -2924,6 +3040,9 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features(); + if (system_supports_sme()) + verify_sme_features(); + if (is_hyp_mode_available()) verify_hyp_capabilities(); } @@ -3041,6 +3160,7 @@ void __init setup_cpu_features(void) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); sve_setup(); + sme_setup(); minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 330b92ea863a..8eff0a34ffd4 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -98,6 +98,15 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", [KERNEL_HWCAP_MTE3] = "mte3", + [KERNEL_HWCAP_SME] = "sme", + [KERNEL_HWCAP_SME_I16I64] = "smei16i64", + [KERNEL_HWCAP_SME_F64F64] = "smef64f64", + [KERNEL_HWCAP_SME_I8I32] = "smei8i32", + [KERNEL_HWCAP_SME_F16F32] = "smef16f32", + [KERNEL_HWCAP_SME_B16F32] = "smeb16f32", + [KERNEL_HWCAP_SME_F32F32] = "smef32f32", + [KERNEL_HWCAP_SME_FA64] = "smefa64", + [KERNEL_HWCAP_WFXT] = "wfxt", }; #ifdef CONFIG_COMPAT @@ -401,6 +410,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); + info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1); if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) info->reg_gmid = read_cpuid(GMID_EL1); @@ -412,6 +422,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) id_aa64pfr0_sve(info->reg_id_aa64pfr0)) info->reg_zcr = read_zcr_features(); + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(info->reg_id_aa64pfr1)) + info->reg_smcr = read_smcr_features(); + cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3ed39c61a510..98d67444a5b6 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,16 +8,9 @@ #include <asm/cpufeature.h> #include <asm/mte.h> -#ifndef VMA_ITERATOR -#define VMA_ITERATOR(name, mm, addr) \ - struct mm_struct *name = mm -#define for_each_vma(vmi, vma) \ - for (vma = vmi->mmap; vma; vma = vma->vm_next) -#endif - -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(tsk, vma) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ + for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -32,10 +25,11 @@ static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) static int mte_dump_tag_range(struct coredump_params *cprm, unsigned long start, unsigned long end) { + int ret = 1; unsigned long addr; + void *tags = NULL; for (addr = start; addr < end; addr += PAGE_SIZE) { - char tags[MTE_PAGE_TAG_STORAGE]; struct page *page = get_dump_page(addr); /* @@ -59,22 +53,36 @@ static int mte_dump_tag_range(struct coredump_params *cprm, continue; } + if (!tags) { + tags = mte_allocate_tag_storage(); + if (!tags) { + put_page(page); + ret = 0; + break; + } + } + mte_save_page_tags(page_address(page), tags); put_page(page); - if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) - return 0; + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { + mte_free_tag_storage(tags); + ret = 0; + break; + } } - return 1; + if (tags) + mte_free_tag_storage(tags); + + return ret; } Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) vma_count++; return vma_count; @@ -83,12 +91,11 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { struct elf_phdr phdr; - phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_type = PT_AARCH64_MEMTAG_MTE; phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; @@ -109,9 +116,8 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -120,9 +126,8 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { if (vma->vm_flags & VM_DONTDUMP) continue; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 878c65aa7206..29139e9a1517 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -537,6 +537,14 @@ static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_sme_acc(esr, regs); + exit_to_user_mode(regs); +} + static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -645,6 +653,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_SVE: el0_sve_acc(regs, esr); break; + case ESR_ELx_EC_SME: + el0_sme_acc(regs, esr); + break; case ESR_ELx_EC_FP_EXC64: el0_fpsimd_exc(regs, esr); break; diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index dc242e269f9a..229436f33df5 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -86,3 +86,39 @@ SYM_FUNC_START(sve_flush_live) SYM_FUNC_END(sve_flush_live) #endif /* CONFIG_ARM64_SVE */ + +#ifdef CONFIG_ARM64_SME + +SYM_FUNC_START(sme_get_vl) + _sme_rdsvl 0, 1 + ret +SYM_FUNC_END(sme_get_vl) + +SYM_FUNC_START(sme_set_vq) + sme_load_vq x0, x1, x2 + ret +SYM_FUNC_END(sme_set_vq) + +/* + * Save the SME state + * + * x0 - pointer to buffer for state + */ +SYM_FUNC_START(za_save_state) + _sme_rdsvl 1, 1 // x1 = VL/8 + sme_save_za 0, x1, 12 + ret +SYM_FUNC_END(za_save_state) + +/* + * Load the SME state + * + * x0 - pointer to buffer for state + */ +SYM_FUNC_START(za_load_state) + _sme_rdsvl 1, 1 // x1 = VL/8 + sme_load_za 0, x1, 12 + ret +SYM_FUNC_END(za_load_state) + +#endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 47af76e53221..64431bc62472 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,7 +121,10 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; + void *za_state; + u64 *svcr; unsigned int sve_vl; + unsigned int sme_vl; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -136,6 +139,12 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { .max_virtualisable_vl = SVE_VL_MIN, }, #endif +#ifdef CONFIG_ARM64_SME + [ARM64_VEC_SME] = { + .type = ARM64_VEC_SME, + .name = "SME", + }, +#endif }; static unsigned int vec_vl_inherit_flag(enum vec_type type) @@ -143,6 +152,8 @@ static unsigned int vec_vl_inherit_flag(enum vec_type type) switch (type) { case ARM64_VEC_SVE: return TIF_SVE_VL_INHERIT; + case ARM64_VEC_SME: + return TIF_SME_VL_INHERIT; default: WARN_ON_ONCE(1); return 0; @@ -186,6 +197,26 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int get_sme_default_vl(void) +{ + return get_default_vl(ARM64_VEC_SME); +} + +static void set_sme_default_vl(int val) +{ + set_default_vl(ARM64_VEC_SME, val); +} + +static void sme_free(struct task_struct *); + +#else + +static inline void sme_free(struct task_struct *t) { } + +#endif + DEFINE_PER_CPU(bool, fpsimd_context_busy); EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); @@ -279,16 +310,27 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, } /* + * TIF_SME controls whether a task can use SME without trapping while + * in userspace, when TIF_SME is set then we must have storage + * alocated in sve_state and za_state to store the contents of both ZA + * and the SVE registers for both streaming and non-streaming modes. + * + * If both SVCR.ZA and SVCR.SM are disabled then at any point we + * may disable TIF_SME and reenable traps. + */ + + +/* * TIF_SVE controls whether a task can use SVE without trapping while - * in userspace, and also the way a task's FPSIMD/SVE state is stored - * in thread_struct. + * in userspace, and also (together with TIF_SME) the way a task's + * FPSIMD/SVE state is stored in thread_struct. * * The kernel uses this flag to track whether a user task is actively * using SVE, and therefore whether full SVE register state needs to * be tracked. If not, the cheaper FPSIMD context handling code can * be used instead of the more costly SVE equivalents. * - * * TIF_SVE set: + * * TIF_SVE or SVCR.SM set: * * The task can execute SVE instructions while in userspace without * trapping to the kernel. @@ -296,7 +338,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the * corresponding Zn), P0-P15 and FFR are encoded in in * task->thread.sve_state, formatted appropriately for vector - * length task->thread.sve_vl. + * length task->thread.sve_vl or, if SVCR.SM is set, + * task->thread.sme_vl. * * task->thread.sve_state must point to a valid buffer at least * sve_state_size(task) bytes in size. @@ -334,16 +377,44 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, */ static void task_fpsimd_load(void) { + bool restore_sve_regs = false; + bool restore_ffr; + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); + /* Check if we should restore SVE first */ if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); + restore_sve_regs = true; + restore_ffr = true; + } + + /* Restore SME, override SVE register configuration if needed */ + if (system_supports_sme()) { + unsigned long sme_vl = task_get_sme_vl(current); + + /* Ensure VL is set up for restoring data */ + if (test_thread_flag(TIF_SME)) + sme_set_vq(sve_vq_from_vl(sme_vl) - 1); + + write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + + if (thread_za_enabled(¤t->thread)) + za_load_state(current->thread.za_state); + + if (thread_sm_enabled(¤t->thread)) { + restore_sve_regs = true; + restore_ffr = system_supports_fa64(); + } + } + + if (restore_sve_regs) sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, true); - } else { + ¤t->thread.uw.fpsimd_state.fpsr, + restore_ffr); + else fpsimd_load_state(¤t->thread.uw.fpsimd_state); - } } /* @@ -361,6 +432,9 @@ static void fpsimd_save(void) struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + bool save_sve_regs = false; + bool save_ffr; + unsigned int vl; WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); @@ -368,9 +442,32 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { + if (test_thread_flag(TIF_SVE)) { + save_sve_regs = true; + save_ffr = true; + vl = last->sve_vl; + } + + if (system_supports_sme()) { + u64 *svcr = last->svcr; + *svcr = read_sysreg_s(SYS_SVCR_EL0); + + *svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (*svcr & SYS_SVCR_EL0_ZA_MASK) + za_save_state(last->za_state); + + /* If we are in streaming mode override regular SVE. */ + if (*svcr & SYS_SVCR_EL0_SM_MASK) { + save_sve_regs = true; + save_ffr = system_supports_fa64(); + vl = last->sme_vl; + } + } + + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + /* Get the configured VL from RDVL, will account for SM */ + if (WARN_ON(sve_get_vl() != vl)) { /* * Can't save the user regs, so current would * re-enter user with corrupt state. @@ -381,8 +478,8 @@ static void fpsimd_save(void) } sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr, true); + sve_ffr_offset(vl), + &last->st->fpsr, save_ffr); } else { fpsimd_save_state(last->st); } @@ -409,6 +506,8 @@ static unsigned int find_supported_vector_length(enum vec_type type, if (vl > max_vl) vl = max_vl; + if (vl < info->min_vl) + vl = info->min_vl; bit = find_next_bit(info->vq_map, SVE_VQ_MAX, __vq_to_bit(sve_vq_from_vl(vl))); @@ -467,6 +566,30 @@ static int __init sve_sysctl_init(void) static int __init sve_sysctl_init(void) { return 0; } #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ +#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL) +static struct ctl_table sme_default_vl_table[] = { + { + .procname = "sme_default_vector_length", + .mode = 0644, + .proc_handler = vec_proc_do_default_vl, + .extra1 = &vl_info[ARM64_VEC_SME], + }, + { } +}; + +static int __init sme_sysctl_init(void) +{ + if (system_supports_sme()) + if (!register_sysctl("abi", sme_default_vl_table)) + return -EINVAL; + + return 0; +} + +#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */ +static int __init sme_sysctl_init(void) { return 0; } +#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */ + #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) @@ -520,7 +643,7 @@ static void fpsimd_to_sve(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); __fpsimd_to_sve(sst, fst, vq); } @@ -537,7 +660,7 @@ static void fpsimd_to_sve(struct task_struct *task) */ static void sve_to_fpsimd(struct task_struct *task) { - unsigned int vq; + unsigned int vq, vl; void const *sst = task->thread.sve_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; @@ -546,7 +669,8 @@ static void sve_to_fpsimd(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vl = thread_get_cur_vl(&task->thread); + vq = sve_vq_from_vl(vl); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); @@ -559,9 +683,16 @@ static void sve_to_fpsimd(struct task_struct *task) * Return how many bytes of memory are required to store the full SVE * state for task, given task's currently configured vector length. */ -static size_t sve_state_size(struct task_struct const *task) +size_t sve_state_size(struct task_struct const *task) { - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task))); + unsigned int vl = 0; + + if (system_supports_sve()) + vl = task_get_sve_vl(task); + if (system_supports_sme()) + vl = max(vl, task_get_sme_vl(task)); + + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); } /* @@ -588,6 +719,19 @@ void sve_alloc(struct task_struct *task) /* + * Force the FPSIMD state shared with SVE to be updated in the SVE state + * even if the SVE state is the current active state. + * + * This should only be called by ptrace. task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. + */ +void fpsimd_force_sync_to_sve(struct task_struct *task) +{ + fpsimd_to_sve(task); +} + +/* * Ensure that task->thread.sve_state is up to date with respect to * the user task, irrespective of when SVE is in use or not. * @@ -597,7 +741,8 @@ void sve_alloc(struct task_struct *task) */ void fpsimd_sync_to_sve(struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SVE)) + if (!test_tsk_thread_flag(task, TIF_SVE) && + !thread_sm_enabled(&task->thread)) fpsimd_to_sve(task); } @@ -611,7 +756,8 @@ void fpsimd_sync_to_sve(struct task_struct *task) */ void sve_sync_to_fpsimd(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_SVE)) + if (test_tsk_thread_flag(task, TIF_SVE) || + thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); } @@ -636,7 +782,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) if (!test_tsk_thread_flag(task, TIF_SVE)) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); __fpsimd_to_sve(sst, fst, vq); @@ -680,8 +826,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, /* * To ensure the FPSIMD bits of the SVE vector registers are preserved, * write any live register state back to task_struct, and convert to a - * regular FPSIMD thread. Since the vector length can only be changed - * with a syscall we can't be in streaming mode while reconfiguring. + * regular FPSIMD thread. */ if (task == current) { get_cpu_fpsimd_context(); @@ -690,17 +835,26 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, } fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) + if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || + thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); + if (system_supports_sme() && type == ARM64_VEC_SME) { + task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK | + SYS_SVCR_EL0_ZA_MASK); + clear_thread_flag(TIF_SME); + } + if (task == current) put_cpu_fpsimd_context(); /* - * Force reallocation of task SVE state to the correct size - * on next use: + * Force reallocation of task SVE and SME state to the correct + * size on next use: */ sve_free(task); + if (system_supports_sme() && type == ARM64_VEC_SME) + sme_free(task); task_set_vl(task, type, vl); @@ -761,6 +915,36 @@ int sve_get_current_vl(void) return vec_prctl_status(ARM64_VEC_SVE, 0); } +#ifdef CONFIG_ARM64_SME +/* PR_SME_SET_VL */ +int sme_set_current_vl(unsigned long arg) +{ + unsigned long vl, flags; + int ret; + + vl = arg & PR_SME_VL_LEN_MASK; + flags = arg & ~vl; + + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags); + if (ret) + return ret; + + return vec_prctl_status(ARM64_VEC_SME, flags); +} + +/* PR_SME_GET_VL */ +int sme_get_current_vl(void) +{ + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + return vec_prctl_status(ARM64_VEC_SME, 0); +} +#endif /* CONFIG_ARM64_SME */ + static void vec_probe_vqs(struct vl_info *info, DECLARE_BITMAP(map, SVE_VQ_MAX)) { @@ -770,7 +954,23 @@ static void vec_probe_vqs(struct vl_info *info, for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { write_vl(info->type, vq - 1); /* self-syncing */ - vl = sve_get_vl(); + + switch (info->type) { + case ARM64_VEC_SVE: + vl = sve_get_vl(); + break; + case ARM64_VEC_SME: + vl = sme_get_vl(); + break; + default: + vl = 0; + break; + } + + /* Minimum VL identified? */ + if (sve_vq_from_vl(vl) > vq) + break; + vq = sve_vq_from_vl(vl); /* skip intervening lengths */ set_bit(__vq_to_bit(vq), map); } @@ -856,21 +1056,25 @@ int vec_verify_vq_map(enum vec_type type) static void __init sve_efi_setup(void) { - struct vl_info *info = &vl_info[ARM64_VEC_SVE]; + int max_vl = 0; + int i; if (!IS_ENABLED(CONFIG_EFI)) return; + for (i = 0; i < ARRAY_SIZE(vl_info); i++) + max_vl = max(vl_info[i].max_vl, max_vl); + /* * alloc_percpu() warns and prints a backtrace if this goes wrong. * This is evidence of a crippled system and we are returning void, * so no attempt is made to handle this situation here. */ - if (!sve_vl_valid(info->max_vl)) + if (!sve_vl_valid(max_vl)) goto fail; efi_sve_state = __alloc_percpu( - SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES); + SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES); if (!efi_sve_state) goto fail; @@ -989,10 +1193,172 @@ void __init sve_setup(void) void fpsimd_release_task(struct task_struct *dead_task) { __sve_free(dead_task); + sme_free(dead_task); } #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +/* + * Ensure that task->thread.za_state is allocated and sufficiently large. + * + * This function should be used only in preparation for replacing + * task->thread.za_state with new data. The memory is always zeroed + * here to prevent stale data from showing through: this is done in + * the interest of testability and predictability, the architecture + * guarantees that when ZA is enabled it will be zeroed. + */ +void sme_alloc(struct task_struct *task) +{ + if (task->thread.za_state) { + memset(task->thread.za_state, 0, za_state_size(task)); + return; + } + + /* This could potentially be up to 64K. */ + task->thread.za_state = + kzalloc(za_state_size(task), GFP_KERNEL); +} + +static void sme_free(struct task_struct *task) +{ + kfree(task->thread.za_state); + task->thread.za_state = NULL; +} + +void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Set priority for all PEs to architecturally defined minimum */ + write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK, + SYS_SMPRI_EL1); + + /* Allow SME in kernel */ + write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1); + isb(); + + /* Allow EL0 to access TPIDR2 */ + write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1); + isb(); +} + +/* + * This must be called after sme_kernel_enable(), we rely on the + * feature table being sorted to ensure this. + */ +void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Allow use of FA64 */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK, + SYS_SMCR_EL1); +} + +/* + * Read the pseudo-SMCR used by cpufeatures to identify the supported + * vector length. + * + * Use only if SME is present. + * This function clobbers the SME vector length. + */ +u64 read_smcr_features(void) +{ + u64 smcr; + unsigned int vq_max; + + sme_kernel_enable(NULL); + sme_smstart_sm(); + + /* + * Set the maximum possible VL. + */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, + SYS_SMCR_EL1); + + smcr = read_sysreg_s(SYS_SMCR_EL1); + smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ + vq_max = sve_vq_from_vl(sve_get_vl()); + smcr |= vq_max - 1; /* set LEN field to maximum effective value */ + + sme_smstop_sm(); + + return smcr; +} + +void __init sme_setup(void) +{ + struct vl_info *info = &vl_info[ARM64_VEC_SME]; + u64 smcr; + int min_bit; + + if (!system_supports_sme()) + return; + + /* + * SME doesn't require any particular vector length be + * supported but it does require at least one. We should have + * disabled the feature entirely while bringing up CPUs but + * let's double check here. + */ + WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX)); + + min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX); + info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit)); + + smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1); + + /* + * Sanity-check that the max VL we determined through CPU features + * corresponds properly to sme_vq_map. If not, do our best: + */ + if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME, + info->max_vl))) + info->max_vl = find_supported_vector_length(ARM64_VEC_SME, + info->max_vl); + + WARN_ON(info->min_vl > info->max_vl); + + /* + * For the default VL, pick the maximum supported value <= 32 + * (256 bits) if there is one since this is guaranteed not to + * grow the signal frame when in streaming mode, otherwise the + * minimum available VL will be used. + */ + set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32)); + + pr_info("SME: minimum available vector length %u bytes per vector\n", + info->min_vl); + pr_info("SME: maximum available vector length %u bytes per vector\n", + info->max_vl); + pr_info("SME: default vector length %u bytes per vector\n", + get_sme_default_vl()); +} + +#endif /* CONFIG_ARM64_SME */ + +static void sve_init_regs(void) +{ + /* + * Convert the FPSIMD state to SVE, zeroing all the state that + * is not shared with FPSIMD. If (as is likely) the current + * state is live in the registers then do this there and + * update our metadata for the current task including + * disabling the trap, otherwise update our in-memory copy. + * We are guaranteed to not be in streaming mode, we can only + * take a SVE trap when not in streaming mode and we can't be + * in streaming mode when taking a SME trap. + */ + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + unsigned long vq_minus_one = + sve_vq_from_vl(task_get_sve_vl(current)) - 1; + sve_set_vq(vq_minus_one); + sve_flush_live(true, vq_minus_one); + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); + } +} + /* * Trapped SVE access * @@ -1024,22 +1390,77 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) WARN_ON(1); /* SVE access shouldn't have trapped */ /* - * Convert the FPSIMD state to SVE, zeroing all the state that - * is not shared with FPSIMD. If (as is likely) the current - * state is live in the registers then do this there and - * update our metadata for the current task including - * disabling the trap, otherwise update our in-memory copy. + * Even if the task can have used streaming mode we can only + * generate SVE access traps in normal SVE mode and + * transitioning out of streaming mode may discard any + * streaming mode state. Always clear the high bits to avoid + * any potential errors tracking what is properly initialised. */ + sve_init_regs(); + + put_cpu_fpsimd_context(); +} + +/* + * Trapped SME access + * + * Storage is allocated for the full SVE and SME state, the current + * FPSIMD register contents are migrated to SVE if SVE is not already + * active, and the access trap is disabled. + * + * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state() + * would have disabled the SME access trap for userspace during + * ret_to_user, making an SVE access trap impossible in that case. + */ +void do_sme_acc(unsigned int esr, struct pt_regs *regs) +{ + /* Even if we chose not to use SME, the hardware could still trap: */ + if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) { + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + return; + } + + /* + * If this not a trap due to SME being disabled then something + * is being used in the wrong mode, report as SIGILL. + */ + if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + return; + } + + sve_alloc(current); + sme_alloc(current); + if (!current->thread.sve_state || !current->thread.za_state) { + force_sig(SIGKILL); + return; + } + + get_cpu_fpsimd_context(); + + /* With TIF_SME userspace shouldn't generate any traps */ + if (test_and_set_thread_flag(TIF_SME)) + WARN_ON(1); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { unsigned long vq_minus_one = - sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_set_vq(vq_minus_one); - sve_flush_live(true, vq_minus_one); + sve_vq_from_vl(task_get_sme_vl(current)) - 1; + sme_set_vq(vq_minus_one); + fpsimd_bind_task_to_cpu(); - } else { - fpsimd_to_sve(current); } + /* + * If SVE was not already active initialise the SVE registers, + * any non-shared state between the streaming and regular SVE + * registers is architecturally guaranteed to be zeroed when + * we enter streaming mode. We do not need to initialize ZA + * since ZA must be disabled at this point and enabling ZA is + * architecturally defined to zero ZA. + */ + if (system_supports_sve() && !test_thread_flag(TIF_SVE)) + sve_init_regs(); + put_cpu_fpsimd_context(); } @@ -1156,6 +1577,13 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SVE); } + if (system_supports_sme()) { + clear_thread_flag(TIF_SME); + sme_free(current); + fpsimd_flush_thread_vl(ARM64_VEC_SME); + current->thread.svcr = 0; + } + put_cpu_fpsimd_context(); } @@ -1198,22 +1626,34 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; + last->za_state = current->thread.za_state; last->sve_vl = task_get_sve_vl(current); + last->sme_vl = task_get_sme_vl(current); + last->svcr = ¤t->thread.svcr; current->thread.fpsimd_cpu = smp_processor_id(); + /* + * Toggle SVE and SME trapping for userspace if needed, these + * are serialsied by ret_to_user(). + */ + if (system_supports_sme()) { + if (test_thread_flag(TIF_SME)) + sme_user_enable(); + else + sme_user_disable(); + } + if (system_supports_sve()) { - /* Toggle SVE trapping for userspace if needed */ if (test_thread_flag(TIF_SVE)) sve_user_enable(); else sve_user_disable(); - - /* Serialised by exception return to user */ } } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl) + unsigned int sve_vl, void *za_state, + unsigned int sme_vl, u64 *svcr) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1222,8 +1662,11 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; + last->svcr = svcr; last->sve_state = sve_state; + last->za_state = za_state; last->sve_vl = sve_vl; + last->sme_vl = sme_vl; } /* @@ -1320,6 +1763,15 @@ static void fpsimd_flush_cpu_state(void) { WARN_ON(!system_supports_fpsimd()); __this_cpu_write(fpsimd_last_state.st, NULL); + + /* + * Leaving streaming mode enabled will cause issues for any kernel + * NEON and leaving streaming mode or ZA enabled may increase power + * consumption. + */ + if (system_supports_sme()) + sme_smstop(); + set_thread_flag(TIF_FOREIGN_FPSTATE); } @@ -1397,6 +1849,7 @@ EXPORT_SYMBOL(kernel_neon_end); static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state); static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); static DEFINE_PER_CPU(bool, efi_sve_state_used); +static DEFINE_PER_CPU(bool, efi_sm_state); /* * EFI runtime services support functions @@ -1431,12 +1884,28 @@ void __efi_fpsimd_begin(void) */ if (system_supports_sve() && likely(efi_sve_state)) { char *sve_state = this_cpu_ptr(efi_sve_state); + bool ffr = true; + u64 svcr; __this_cpu_write(efi_sve_state_used, true); + if (system_supports_sme()) { + svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (!system_supports_fa64()) + ffr = svcr & SYS_SVCR_EL0_SM_MASK; + + __this_cpu_write(efi_sm_state, ffr); + } + sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - true); + ffr); + + if (system_supports_sme()) + sysreg_clear_set_s(SYS_SVCR_EL0, + SYS_SVCR_EL0_SM_MASK, 0); + } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); } @@ -1459,11 +1928,26 @@ void __efi_fpsimd_end(void) if (system_supports_sve() && likely(__this_cpu_read(efi_sve_state_used))) { char const *sve_state = this_cpu_ptr(efi_sve_state); + bool ffr = true; + + /* + * Restore streaming mode; EFI calls are + * normal function calls so should not return in + * streaming mode. + */ + if (system_supports_sme()) { + if (__this_cpu_read(efi_sm_state)) { + sysreg_clear_set_s(SYS_SVCR_EL0, + 0, + SYS_SVCR_EL0_SM_MASK); + if (!system_supports_fa64()) + ffr = efi_sm_state; + } + } - sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1); sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - true); + ffr); __this_cpu_write(efi_sve_state_used, false); } else { @@ -1538,6 +2022,13 @@ static int __init fpsimd_init(void) if (!cpu_have_named_feature(ASIMD)) pr_notice("Advanced SIMD is not implemented\n"); - return sve_sysctl_init(); + + if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE)) + pr_notice("SME is implemented but not SVE\n"); + + sve_sysctl_init(); + sme_sysctl_init(); + + return 0; } core_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 712e97c03e54..cd868084e724 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -701,7 +701,7 @@ NOKPROBE_SYMBOL(breakpoint_handler); * addresses. There is no straight-forward way, short of disassembling the * offending instruction, to map that address back to the watchpoint. This * function computes the distance of the memory access from the watchpoint as a - * heuristic for the likelyhood that a given access triggered the watchpoint. + * heuristic for the likelihood that a given access triggered the watchpoint. * * See Section D2.10.5 "Determining the memory location that caused a Watchpoint * exception" of ARMv8 Architecture Reference Manual for details. diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index e53493d8b208..a3d0494f25a9 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -220,7 +220,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, * increasing the section's alignment so that the * resulting address of this instruction is guaranteed * to equal the offset in that particular bit (as well - * as all less signficant bits). This ensures that the + * as all less significant bits). This ensures that the * address modulo 4 KB != 0xfff8 or 0xfffc (which would * have all ones in bits [11:3]) */ diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 771f543464e0..33e0fabc0b79 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7fa97df55e3a..9734c9fb1a32 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -250,6 +250,8 @@ void show_regs(struct pt_regs *regs) static void tls_thread_flush(void) { write_sysreg(0, tpidr_el0); + if (system_supports_tpidr2()) + write_sysreg_s(0, SYS_TPIDR2_EL0); if (is_compat_task()) { current->thread.uw.tp_value = 0; @@ -298,16 +300,42 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * Detach src's sve_state (if any) from dst so that it does not - * get erroneously used or freed prematurely. dst's sve_state + * get erroneously used or freed prematurely. dst's copies * will be allocated on demand later on if dst uses SVE. * For consistency, also clear TIF_SVE here: this could be done * later in copy_process(), but to avoid tripping up future - * maintainers it is best not to leave TIF_SVE and sve_state in + * maintainers it is best not to leave TIF flags and buffers in * an inconsistent state, even temporarily. */ dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + /* + * In the unlikely event that we create a new thread with ZA + * enabled we should retain the ZA state so duplicate it here. + * This may be shortly freed if we exec() or if CLONE_SETTLS + * but it's simpler to do it here. To avoid confusing the rest + * of the code ensure that we have a sve_state allocated + * whenever za_state is allocated. + */ + if (thread_za_enabled(&src->thread)) { + dst->thread.sve_state = kzalloc(sve_state_size(src), + GFP_KERNEL); + if (!dst->thread.sve_state) + return -ENOMEM; + dst->thread.za_state = kmemdup(src->thread.za_state, + za_state_size(src), + GFP_KERNEL); + if (!dst->thread.za_state) { + kfree(dst->thread.sve_state); + dst->thread.sve_state = NULL; + return -ENOMEM; + } + } else { + dst->thread.za_state = NULL; + clear_tsk_thread_flag(dst, TIF_SME); + } + /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); @@ -343,6 +371,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * out-of-sync with the saved value. */ *task_user_tls(p) = read_sysreg(tpidr_el0); + if (system_supports_tpidr2()) + p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); if (stack_start) { if (is_compat_thread(task_thread_info(p))) @@ -353,10 +383,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, /* * If a TLS pointer was passed to clone, use it for the new - * thread. + * thread. We also reset TPIDR2 if it's in use. */ - if (clone_flags & CLONE_SETTLS) + if (clone_flags & CLONE_SETTLS) { p->thread.uw.tp_value = tls; + p->thread.tpidr2_el0 = 0; + } } else { /* * A kthread has no context to ERET to, so ensure any buggy @@ -387,6 +419,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, void tls_preserve_current_state(void) { *task_user_tls(current) = read_sysreg(tpidr_el0); + if (system_supports_tpidr2() && !is_compat_task()) + current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); } static void tls_thread_switch(struct task_struct *next) @@ -399,6 +433,8 @@ static void tls_thread_switch(struct task_struct *next) write_sysreg(0, tpidrro_el0); write_sysreg(*task_user_tls(next), tpidr_el0); + if (system_supports_tpidr2()) + write_sysreg_s(next->thread.tpidr2_el0, SYS_TPIDR2_EL0); } /* diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 5777929d35bf..40be3a7c2c53 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -853,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope) if (scope == SCOPE_LOCAL_CPU) { static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 230a47b9189e..47d8a7472171 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -713,21 +713,51 @@ static int system_call_set(struct task_struct *target, #ifdef CONFIG_ARM64_SVE static void sve_init_header_from_task(struct user_sve_header *header, - struct task_struct *target) + struct task_struct *target, + enum vec_type type) { unsigned int vq; + bool active; + bool fpsimd_only; + enum vec_type task_type; memset(header, 0, sizeof(*header)); - header->flags = test_tsk_thread_flag(target, TIF_SVE) ? - SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; - if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) - header->flags |= SVE_PT_VL_INHERIT; + /* Check if the requested registers are active for the task */ + if (thread_sm_enabled(&target->thread)) + task_type = ARM64_VEC_SME; + else + task_type = ARM64_VEC_SVE; + active = (task_type == type); + + switch (type) { + case ARM64_VEC_SVE: + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); + break; + case ARM64_VEC_SME: + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = false; + break; + default: + WARN_ON_ONCE(1); + return; + } - header->vl = task_get_sve_vl(target); + if (active) { + if (fpsimd_only) { + header->flags |= SVE_PT_REGS_FPSIMD; + } else { + header->flags |= SVE_PT_REGS_SVE; + } + } + + header->vl = task_get_vl(target, type); vq = sve_vq_from_vl(header->vl); - header->max_vl = sve_max_vl(); + header->max_vl = vec_max_vl(type); header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -738,19 +768,17 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header) return ALIGN(header->size, SVE_VQ_BYTES); } -static int sve_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) +static int sve_get_common(struct task_struct *target, + const struct user_regset *regset, + struct membuf to, + enum vec_type type) { struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ - sve_init_header_from_task(&header, target); + sve_init_header_from_task(&header, target, type); vq = sve_vq_from_vl(header.vl); membuf_write(&to, &header, sizeof(header)); @@ -758,49 +786,61 @@ static int sve_get(struct task_struct *target, if (target == current) fpsimd_preserve_current_state(); - /* Registers: FPSIMD-only case */ - BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); - if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) + BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + + switch ((header.flags & SVE_PT_REGS_MASK)) { + case SVE_PT_REGS_FPSIMD: return __fpr_get(target, regset, to); - /* Otherwise: full SVE case */ + case SVE_PT_REGS_SVE: + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + membuf_write(&to, target->thread.sve_state, end - start); - BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); - start = SVE_PT_SVE_OFFSET; - end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); - membuf_write(&to, target->thread.sve_state, end - start); + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + membuf_zero(&to, end - start); - start = end; - end = SVE_PT_SVE_FPSR_OFFSET(vq); - membuf_zero(&to, end - start); + /* + * Copy fpsr, and fpcr which must follow contiguously in + * struct fpsimd_state: + */ + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, + end - start); - /* - * Copy fpsr, and fpcr which must follow contiguously in - * struct fpsimd_state: - */ - start = end; - end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; - membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start); + start = end; + end = sve_size_from_header(&header); + return membuf_zero(&to, end - start); - start = end; - end = sve_size_from_header(&header); - return membuf_zero(&to, end - start); + default: + return 0; + } } -static int sve_set(struct task_struct *target, +static int sve_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + struct membuf to) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SVE); +} + +static int sve_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + enum vec_type type) { int ret; struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ if (count < sizeof(header)) return -EINVAL; @@ -813,13 +853,37 @@ static int sve_set(struct task_struct *target, * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by * vec_set_vector_length(), which will also validate them for us: */ - ret = vec_set_vector_length(target, ARM64_VEC_SVE, header.vl, + ret = vec_set_vector_length(target, type, header.vl, ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); if (ret) goto out; /* Actual VL set may be less than the user asked for: */ - vq = sve_vq_from_vl(task_get_sve_vl(target)); + vq = sve_vq_from_vl(task_get_vl(target, type)); + + /* Enter/exit streaming mode */ + if (system_supports_sme()) { + u64 old_svcr = target->thread.svcr; + + switch (type) { + case ARM64_VEC_SVE: + target->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; + break; + case ARM64_VEC_SME: + target->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* + * If we switched then invalidate any existing SVE + * state and ensure there's storage. + */ + if (target->thread.svcr != old_svcr) + sve_alloc(target); + } /* Registers: FPSIMD-only case */ @@ -828,10 +892,15 @@ static int sve_set(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); + if (type == ARM64_VEC_SME) + fpsimd_force_sync_to_sve(target); goto out; } - /* Otherwise: full SVE case */ + /* + * Otherwise: no registers or full SVE case. For backwards + * compatibility reasons we treat empty flags as SVE registers. + */ /* * If setting a different VL from the requested VL and there is @@ -852,8 +921,9 @@ static int sve_set(struct task_struct *target, /* * Ensure target->thread.sve_state is up to date with target's - * FPSIMD regs, so that a short copyin leaves trailing registers - * unmodified. + * FPSIMD regs, so that a short copyin leaves trailing + * registers unmodified. Always enable SVE even if going into + * streaming mode. */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); @@ -889,8 +959,181 @@ out: return ret; } +static int sve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SVE); +} + #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int ssve_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SME); +} + +static int ssve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SME); +} + +static int za_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + memset(&header, 0, sizeof(header)); + + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header.flags |= ZA_PT_VL_INHERIT; + + header.vl = task_get_sme_vl(target); + vq = sve_vq_from_vl(header.vl); + header.max_vl = sme_max_vl(); + header.max_size = ZA_PT_SIZE(vq); + + /* If ZA is not active there is only the header */ + if (thread_za_enabled(&target->thread)) + header.size = ZA_PT_SIZE(vq); + else + header.size = ZA_PT_ZA_OFFSET; + + membuf_write(&to, &header, sizeof(header)); + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + end = ZA_PT_ZA_OFFSET; + + if (target == current) + fpsimd_preserve_current_state(); + + /* Any register data to include? */ + if (thread_za_enabled(&target->thread)) { + start = end; + end = ZA_PT_SIZE(vq); + membuf_write(&to, target->thread.za_state, end - start); + } + + /* Zero any trailing padding */ + start = end; + end = ALIGN(header.size, SVE_VQ_BYTES); + return membuf_zero(&to, end - start); +} + +static int za_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + if (count < sizeof(header)) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) + goto out; + + /* + * All current ZA_PT_* flags are consumed by + * vec_set_vector_length(), which will also validate them for + * us: + */ + ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl, + ((unsigned long)header.flags) << 16); + if (ret) + goto out; + + /* Actual VL set may be less than the user asked for: */ + vq = sve_vq_from_vl(task_get_sme_vl(target)); + + /* Ensure there is some SVE storage for streaming mode */ + if (!target->thread.sve_state) { + sve_alloc(target); + if (!target->thread.sve_state) { + clear_thread_flag(TIF_SME); + ret = -ENOMEM; + goto out; + } + } + + /* Allocate/reinit ZA storage */ + sme_alloc(target); + if (!target->thread.za_state) { + ret = -ENOMEM; + clear_tsk_thread_flag(target, TIF_SME); + goto out; + } + + /* If there is no data then disable ZA */ + if (!count) { + target->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + goto out; + } + + /* + * If setting a different VL from the requested VL and there is + * register data, the data layout will be wrong: don't even + * try to set the registers in this case. + */ + if (vq != sve_vq_from_vl(header.vl)) { + ret = -EIO; + goto out; + } + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + start = ZA_PT_ZA_OFFSET; + end = ZA_PT_SIZE(vq); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.za_state, + start, end); + if (ret) + goto out; + + /* Mark ZA as active and let userspace use it */ + set_tsk_thread_flag(target, TIF_SME); + target->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + +out: + fpsimd_flush_task_state(target); + return ret; +} + +#endif /* CONFIG_ARM64_SME */ + #ifdef CONFIG_ARM64_PTR_AUTH static int pac_mask_get(struct task_struct *target, const struct user_regset *regset, @@ -1108,6 +1351,10 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif +#ifdef CONFIG_ARM64_SVE + REGSET_SSVE, + REGSET_ZA, +#endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, REGSET_PAC_ENABLED_KEYS, @@ -1188,6 +1435,25 @@ static const struct user_regset aarch64_regsets[] = { .set = sve_set, }, #endif +#ifdef CONFIG_ARM64_SME + [REGSET_SSVE] = { /* Streaming mode SVE */ + .core_note_type = NT_ARM_SSVE, + .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = ssve_get, + .set = ssve_set, + }, + [REGSET_ZA] = { /* SME ZA */ + .core_note_type = NT_ARM_ZA, + .n = DIV_ROUND_UP(ZA_PT_ZA_SIZE(SVE_VQ_MAX), SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = za_get, + .set = za_set, + }, +#endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { .core_note_type = NT_ARM_PAC_MASK, diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 4a4122ef6f39..2295948d97fd 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -56,6 +56,7 @@ struct rt_sigframe_user_layout { unsigned long fpsimd_offset; unsigned long esr_offset; unsigned long sve_offset; + unsigned long za_offset; unsigned long extra_offset; unsigned long end_offset; }; @@ -218,6 +219,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) struct user_ctxs { struct fpsimd_context __user *fpsimd; struct sve_context __user *sve; + struct za_context __user *za; }; #ifdef CONFIG_ARM64_SVE @@ -226,11 +228,17 @@ static int preserve_sve_context(struct sve_context __user *ctx) { int err = 0; u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + u16 flags = 0; unsigned int vl = task_get_sve_vl(current); unsigned int vq = 0; - if (test_thread_flag(TIF_SVE)) + if (thread_sm_enabled(¤t->thread)) { + vl = task_get_sme_vl(current); vq = sve_vq_from_vl(vl); + flags |= SVE_SIG_FLAG_SM; + } else if (test_thread_flag(TIF_SVE)) { + vq = sve_vq_from_vl(vl); + } memset(reserved, 0, sizeof(reserved)); @@ -238,6 +246,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) __put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16), &ctx->head.size, err); __put_user_error(vl, &ctx->vl, err); + __put_user_error(flags, &ctx->flags, err); BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); @@ -258,18 +267,28 @@ static int preserve_sve_context(struct sve_context __user *ctx) static int restore_sve_fpsimd_context(struct user_ctxs *user) { int err; - unsigned int vq; + unsigned int vl, vq; struct user_fpsimd_state fpsimd; struct sve_context sve; if (__copy_from_user(&sve, user->sve, sizeof(sve))) return -EFAULT; - if (sve.vl != task_get_sve_vl(current)) + if (sve.flags & SVE_SIG_FLAG_SM) { + if (!system_supports_sme()) + return -EINVAL; + + vl = task_get_sme_vl(current); + } else { + vl = task_get_sve_vl(current); + } + + if (sve.vl != vl) return -EINVAL; if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); + current->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; goto fpsimd_only; } @@ -301,7 +320,10 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (err) return -EFAULT; - set_thread_flag(TIF_SVE); + if (sve.flags & SVE_SIG_FLAG_SM) + current->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + else + set_thread_flag(TIF_SVE); fpsimd_only: /* copy the FP and status/control registers */ @@ -326,6 +348,101 @@ extern int restore_sve_fpsimd_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int preserve_za_context(struct za_context __user *ctx) +{ + int err = 0; + u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + unsigned int vl = task_get_sme_vl(current); + unsigned int vq; + + if (thread_za_enabled(¤t->thread)) + vq = sve_vq_from_vl(vl); + else + vq = 0; + + memset(reserved, 0, sizeof(reserved)); + + __put_user_error(ZA_MAGIC, &ctx->head.magic, err); + __put_user_error(round_up(ZA_SIG_CONTEXT_SIZE(vq), 16), + &ctx->head.size, err); + __put_user_error(vl, &ctx->vl, err); + BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); + err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); + + if (vq) { + /* + * This assumes that the ZA state has already been saved to + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). + */ + err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, + current->thread.za_state, + ZA_SIG_REGS_SIZE(vq)); + } + + return err ? -EFAULT : 0; +} + +static int restore_za_context(struct user_ctxs __user *user) +{ + int err; + unsigned int vq; + struct za_context za; + + if (__copy_from_user(&za, user->za, sizeof(za))) + return -EFAULT; + + if (za.vl != task_get_sme_vl(current)) + return -EINVAL; + + if (za.head.size <= sizeof(*user->za)) { + current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + return 0; + } + + vq = sve_vq_from_vl(za.vl); + + if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq)) + return -EINVAL; + + /* + * Careful: we are about __copy_from_user() directly into + * thread.za_state with preemption enabled, so protection is + * needed to prevent a racing context switch from writing stale + * registers back over the new data. + */ + + fpsimd_flush_task_state(current); + /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ + + sme_alloc(current); + if (!current->thread.za_state) { + current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + clear_thread_flag(TIF_SME); + return -ENOMEM; + } + + err = __copy_from_user(current->thread.za_state, + (char __user const *)user->za + + ZA_SIG_REGS_OFFSET, + ZA_SIG_REGS_SIZE(vq)); + if (err) + return -EFAULT; + + set_thread_flag(TIF_SME); + current->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + + return 0; +} +#else /* ! CONFIG_ARM64_SME */ + +/* Turn any non-optimised out attempts to use these into a link error: */ +extern int preserve_za_context(void __user *ctx); +extern int restore_za_context(struct user_ctxs *user); + +#endif /* ! CONFIG_ARM64_SME */ static int parse_user_sigframe(struct user_ctxs *user, struct rt_sigframe __user *sf) @@ -340,6 +457,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpsimd = NULL; user->sve = NULL; + user->za = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -393,7 +511,7 @@ static int parse_user_sigframe(struct user_ctxs *user, break; case SVE_MAGIC: - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) goto invalid; if (user->sve) @@ -405,6 +523,19 @@ static int parse_user_sigframe(struct user_ctxs *user, user->sve = (struct sve_context __user *)head; break; + case ZA_MAGIC: + if (!system_supports_sme()) + goto invalid; + + if (user->za) + goto invalid; + + if (size < sizeof(*user->za)) + goto invalid; + + user->za = (struct za_context __user *)head; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -528,6 +659,9 @@ static int restore_sigframe(struct pt_regs *regs, } } + if (err == 0 && system_supports_sme() && user.za) + err = restore_za_context(&user); + return err; } @@ -594,11 +728,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, if (system_supports_sve()) { unsigned int vq = 0; - if (add_all || test_thread_flag(TIF_SVE)) { - int vl = sve_max_vl(); + if (add_all || test_thread_flag(TIF_SVE) || + thread_sm_enabled(¤t->thread)) { + int vl = max(sve_max_vl(), sme_max_vl()); if (!add_all) - vl = task_get_sve_vl(current); + vl = thread_get_cur_vl(¤t->thread); vq = sve_vq_from_vl(vl); } @@ -609,6 +744,24 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_sme()) { + unsigned int vl; + unsigned int vq = 0; + + if (add_all) + vl = sme_max_vl(); + else + vl = task_get_sme_vl(current); + + if (thread_za_enabled(¤t->thread)) + vq = sve_vq_from_vl(vl); + + err = sigframe_alloc(user, &user->za_offset, + ZA_SIG_CONTEXT_SIZE(vq)); + if (err) + return err; + } + return sigframe_alloc_end(user); } @@ -649,13 +802,21 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); } - /* Scalable Vector Extension state, if present */ - if (system_supports_sve() && err == 0 && user->sve_offset) { + /* Scalable Vector Extension state (including streaming), if present */ + if ((system_supports_sve() || system_supports_sme()) && + err == 0 && user->sve_offset) { struct sve_context __user *sve_ctx = apply_user_offset(user, user->sve_offset); err |= preserve_sve_context(sve_ctx); } + /* ZA state if present */ + if (system_supports_sme() && err == 0 && user->za_offset) { + struct za_context __user *za_ctx = + apply_user_offset(user, user->za_offset); + err |= preserve_za_context(za_ctx); + } + if (err == 0 && user->extra_offset) { char __user *sfp = (char __user *)user->sigframe; char __user *userp = @@ -759,6 +920,13 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* TCO (Tag Check Override) always cleared for signal handlers */ regs->pstate &= ~PSR_TCO_BIT; + /* Signal handlers are invoked with ZA and streaming mode disabled */ + if (system_supports_sme()) { + current->thread.svcr &= ~(SYS_SVCR_EL0_ZA_MASK | + SYS_SVCR_EL0_SM_MASK); + sme_smstop(); + } + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 27df5c1e6baa..3b46041f2b97 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void) * Log the CPU info before it is marked online and might get read. */ cpuinfo_store_cpu(); + store_cpu_topology(cpu); /* * Enable GIC and timers. @@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void) ipi_setup(cpu); - store_cpu_topology(cpu); numa_add_cpu(cpu); /* diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 19ee7c33769d..2b0887e58a7c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -140,7 +140,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully - * renabled if it was enabled when core started shutdown. + * reenabled if it was enabled when core started shutdown. */ local_daif_restore(flags); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index c938603b3ba0..92c69e5ac269 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -158,11 +158,36 @@ trace_exit: syscall_trace_exit(regs); } -static inline void sve_user_discard(void) +/* + * As per the ABI exit SME streaming mode and clear the SVE state not + * shared with FPSIMD on syscall entry. + */ +static inline void fp_user_discard(void) { + /* + * If SME is active then exit streaming mode. If ZA is active + * then flush the SVE registers but leave userspace access to + * both SVE and SME enabled, otherwise disable SME for the + * task and fall through to disabling SVE too. This means + * that after a syscall we never have any streaming mode + * register state to track, if this changes the KVM code will + * need updating. + */ + if (system_supports_sme() && test_thread_flag(TIF_SME)) { + u64 svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (svcr & SYS_SVCR_EL0_SM_MASK) + sme_smstop_sm(); + } + if (!system_supports_sve()) return; + /* + * If SME is not active then disable SVE, the registers will + * be cleared when userspace next attempts to access them and + * we do not need to track the SVE register state until then. + */ clear_thread_flag(TIF_SVE); /* @@ -177,7 +202,7 @@ static inline void sve_user_discard(void) void do_el0_svc(struct pt_regs *regs) { - sve_user_discard(); + fp_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0529fd57567e..6751621e5bea 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -821,6 +821,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SVE] = "SVE", [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", [ESR_ELx_EC_FPAC] = "FPAC", + [ESR_ELx_EC_SME] = "SME", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 261644b1a6bb..aa127ae9f675 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ - vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \ + vgic-sys-reg-v3.o fpsimd.o pkvm.o \ arch_timer.o trng.o vmid.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ @@ -22,7 +22,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o +kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 6e542e2eae32..4e39ace073af 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -208,18 +208,16 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) return IRQ_HANDLED; } -static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) +static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, + u64 val) { - u64 cval, now; - - cval = timer_get_cval(timer_ctx); - now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); + u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); - if (now < cval) { + if (now < val) { u64 ns; ns = cyclecounter_cyc2ns(timecounter->cc, - cval - now, + val - now, timecounter->mask, &timecounter->frac); return ns; @@ -228,6 +226,11 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) return 0; } +static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) +{ + return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); +} + static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) { WARN_ON(timer_ctx && timer_ctx->loaded); @@ -236,6 +239,20 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); } +static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) +{ + return (cpus_have_final_cap(ARM64_HAS_WFXT) && + (vcpu->arch.flags & KVM_ARM64_WFIT)); +} + +static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *ctx = vcpu_vtimer(vcpu); + u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + + return kvm_counter_compute_delta(ctx, val); +} + /* * Returns the earliest expiration time in ns among guest timers. * Note that it will return 0 if none of timers can fire. @@ -253,6 +270,9 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); } + if (vcpu_has_wfit_active(vcpu)) + min_delta = min(min_delta, wfit_delay_ns(vcpu)); + /* If none of timers can fire, then return 0 */ if (min_delta == ULLONG_MAX) return 0; @@ -350,15 +370,9 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) return cval <= now; } -bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - struct timer_map map; - - get_timer_map(vcpu, &map); - - return kvm_timer_should_fire(map.direct_vtimer) || - kvm_timer_should_fire(map.direct_ptimer) || - kvm_timer_should_fire(map.emul_ptimer); + return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; } /* @@ -484,7 +498,8 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) */ if (!kvm_timer_irq_can_fire(map.direct_vtimer) && !kvm_timer_irq_can_fire(map.direct_ptimer) && - !kvm_timer_irq_can_fire(map.emul_ptimer)) + !kvm_timer_irq_can_fire(map.emul_ptimer) && + !vcpu_has_wfit_active(vcpu)) return; /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7fceb855fa71..807b2853b02a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -97,6 +97,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->lock); break; + case KVM_CAP_ARM_SYSTEM_SUSPEND: + r = 0; + set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); + break; default: r = -EINVAL; break; @@ -156,6 +160,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->max_vcpus = kvm_arm_default_max_vcpus(); set_default_spectre(kvm); + kvm_arm_init_hypercalls(kvm); return ret; out_free_stage2_pgd: @@ -210,6 +215,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: + case KVM_CAP_ARM_SYSTEM_SUSPEND: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -356,11 +362,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_arm_vcpu_destroy(vcpu); } -int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) -{ - return kvm_timer_is_pending(vcpu); -} - void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { @@ -432,20 +433,34 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -static void vcpu_power_off(struct kvm_vcpu *vcpu) +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } +bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; +} + +static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + kvm_vcpu_kick(vcpu); +} + +static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = vcpu->arch.mp_state; return 0; } @@ -457,10 +472,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + vcpu->arch.mp_state = *mp_state; break; case KVM_MP_STATE_STOPPED: - vcpu_power_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); + break; + case KVM_MP_STATE_SUSPENDED: + kvm_arm_vcpu_suspend(vcpu); break; default: ret = -EINVAL; @@ -480,7 +498,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) - && !v->arch.power_off && !v->arch.pause); + && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) @@ -592,15 +610,15 @@ void kvm_arm_resume_guest(struct kvm *kvm) } } -static void vcpu_req_sleep(struct kvm_vcpu *vcpu) +static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu) { struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) &&(!vcpu->arch.pause), + (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* Awaken to handle a signal, request we sleep again later. */ kvm_make_request(KVM_REQ_SLEEP, vcpu); } @@ -639,6 +657,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); kvm_vcpu_halt(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_WFIT; kvm_clear_request(KVM_REQ_UNHALT, vcpu); preempt_disable(); @@ -646,11 +665,53 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); } -static void check_vcpu_requests(struct kvm_vcpu *vcpu) +static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_vcpu_suspended(vcpu)) + return 1; + + kvm_vcpu_wfi(vcpu); + + /* + * The suspend state is sticky; we do not leave it until userspace + * explicitly marks the vCPU as runnable. Request that we suspend again + * later. + */ + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + + /* + * Check to make sure the vCPU is actually runnable. If so, exit to + * userspace informing it of the wakeup condition. + */ + if (kvm_arch_vcpu_runnable(vcpu)) { + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + return 0; + } + + /* + * Otherwise, we were unblocked to process a different event, such as a + * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to + * process the event. + */ + return 1; +} + +/** + * check_vcpu_requests - check and handle pending vCPU requests + * @vcpu: the VCPU pointer + * + * Return: 1 if we should enter the guest + * 0 if we should exit to userspace + * < 0 if we should exit to userspace, where the return value indicates + * an error + */ +static int check_vcpu_requests(struct kvm_vcpu *vcpu) { if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) - vcpu_req_sleep(vcpu); + kvm_vcpu_sleep(vcpu); if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) kvm_reset_vcpu(vcpu); @@ -675,7 +736,12 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) kvm_pmu_handle_pmcr(vcpu, __vcpu_sys_reg(vcpu, PMCR_EL0)); + + if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) + return kvm_vcpu_suspend(vcpu); } + + return 1; } static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) @@ -791,7 +857,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (!ret) ret = 1; - check_vcpu_requests(vcpu); + if (ret > 0) + ret = check_vcpu_requests(vcpu); /* * Preparing the interrupts to be injected also @@ -815,6 +882,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_vgic_flush_hwstate(vcpu); + kvm_pmu_update_vcpu_events(vcpu); + /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. @@ -1124,9 +1193,9 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * Handle the "start in power-off" case. */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - vcpu_power_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); else - vcpu->arch.power_off = false; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; return 0; } @@ -1483,7 +1552,6 @@ static void cpu_prepare_hyp_mode(int cpu) tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; params->tcr_el2 = tcr; - params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); if (is_protected_kvm_enabled()) params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; @@ -1761,8 +1829,6 @@ static int init_subsystems(void) kvm_register_perf_callbacks(NULL); - kvm_sys_reg_table_init(); - out: if (err || !is_protected_kvm_enabled()) on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); @@ -1933,14 +1999,46 @@ static int init_hyp_mode(void) * Map the Hyp stack pages */ for_each_possible_cpu(cpu) { + struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); - err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, - PAGE_HYP); + unsigned long hyp_addr; + + /* + * Allocate a contiguous HYP private VA range for the stack + * and guard page. The allocation is also aligned based on + * the order of its size. + */ + err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + if (err) { + kvm_err("Cannot allocate hyp stack guard page\n"); + goto out_err; + } + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE, + __pa(stack_page), PAGE_HYP); if (err) { kvm_err("Cannot map hyp stack\n"); goto out_err; } + + /* + * Save the stack PA in nvhe_init_params. This will be needed + * to recreate the stack mapping in protected nVHE mode. + * __hyp_pa() won't do the right thing there, since the stack + * has been mapped in the flexible private VA space. + */ + params->stack_pa = __pa(stack_page); + + params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } for_each_possible_cpu(cpu) { @@ -2089,6 +2187,12 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + err = kvm_sys_reg_table_init(); + if (err) { + kvm_info("Error initializing system register tables"); + return err; + } + in_hyp_mode = is_kernel_in_hyp_mode(); if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 397fdac75cb1..441edb9c398c 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -82,6 +82,26 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; + + /* + * We don't currently support SME guests but if we leave + * things in streaming mode then when the guest starts running + * FPSIMD or SVE code it may generate SME traps so as a + * special case if we are in streaming mode we force the host + * state to be saved now and exit streaming mode so that we + * don't have to handle any SME traps for valid guest + * operations. Do this for ZA as well for now for simplicity. + */ + if (system_supports_sme()) { + if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) + vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; + + if (read_sysreg_s(SYS_SVCR_EL0) & + (SYS_SVCR_EL0_SM_MASK | SYS_SVCR_EL0_ZA_MASK)) { + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + fpsimd_save_and_flush_cpu_state(); + } + } } /* @@ -109,9 +129,14 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + /* + * Currently we do not support SME guests so SVCR is + * always 0 and we just need a variable to point to. + */ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, - vcpu->arch.sve_max_vl); + vcpu->arch.sve_max_vl, + NULL, 0, &vcpu->arch.svcr); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); @@ -130,6 +155,22 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); + /* + * If we have VHE then the Hyp code will reset CPACR_EL1 to + * CPACR_EL1_DEFAULT and we need to reenable SME. + */ + if (has_vhe() && system_supports_sme()) { + /* Also restore EL0 state seen on entry */ + if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED) + sysreg_clear_set(CPACR_EL1, 0, + CPACR_EL1_SMEN_EL0EN | + CPACR_EL1_SMEN_EL1EN); + else + sysreg_clear_set(CPACR_EL1, + CPACR_EL1_SMEN_EL0EN, + CPACR_EL1_SMEN_EL1EN); + } + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 7e15b03fbdf8..8c607199cad1 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -18,7 +18,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/fs.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <asm/cputype.h> #include <linux/uaccess.h> #include <asm/fpsimd.h> @@ -756,7 +756,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) switch (reg->id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg); - case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: + return kvm_arm_get_fw_reg(vcpu, reg); case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } @@ -774,7 +776,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) switch (reg->id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); - case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: + return kvm_arm_set_fw_reg(vcpu, reg); case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 97fe14aab1a3..2ebebd3efaee 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -80,24 +80,51 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu) * * @vcpu: the vcpu pointer * - * WFE: Yield the CPU and come back to this vcpu when the scheduler + * WFE[T]: Yield the CPU and come back to this vcpu when the scheduler * decides to. * WFI: Simply call kvm_vcpu_halt(), which will halt execution of * world-switches and schedule other host processes until there is an * incoming IRQ or FIQ to the VM. + * WFIT: Same as WFI, with a timed wakeup implemented as a background timer + * + * WF{I,E}T can immediately return if the deadline has already expired. */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) { + u64 esr = kvm_vcpu_get_esr(vcpu); + + if (esr & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; - kvm_vcpu_wfi(vcpu); } + if (esr & ESR_ELx_WFx_ISS_WFxT) { + if (esr & ESR_ELx_WFx_ISS_RV) { + u64 val, now; + + now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + + if (now >= val) + goto out; + } else { + /* Treat WFxT as WFx if RN is invalid */ + esr &= ~ESR_ELx_WFx_ISS_WFxT; + } + } + + if (esr & ESR_ELx_WFx_ISS_WFE) { + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); + } else { + if (esr & ESR_ELx_WFx_ISS_WFxT) + vcpu->arch.flags |= KVM_ARM64_WFIT; + + kvm_vcpu_wfi(vcpu); + } +out: kvm_incr_pc(vcpu); return 1; @@ -167,6 +194,7 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, [ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32, [ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id, [ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64, [ESR_ELx_EC_HVC32] = handle_hvc, [ESR_ELx_EC_SMC32] = handle_smc, @@ -295,13 +323,8 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_in_kimg = __phys_to_kimg(elr_phys); u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt; u64 mode = spsr & PSR_MODE_MASK; + u64 panic_addr = elr_virt + hyp_offset; - /* - * The nVHE hyp symbols are not included by kallsyms to avoid issues - * with aliasing. That means that the symbols cannot be printed with the - * "%pS" format specifier, so fall back to the vmlinux address if - * there's no better option. - */ if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) { kvm_err("Invalid host exception to nVHE hyp!\n"); } else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && @@ -321,9 +344,11 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, if (file) kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else - kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset); + kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr, + (void *)panic_addr); } else { - kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset); + kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr, + (void *)panic_addr); } /* diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 2d08510c6cc1..42d8eb9bfe72 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -19,8 +19,10 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); -unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, - enum kvm_pgtable_prot prot); +int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, + enum kvm_pgtable_prot prot, + unsigned long *haddr); +int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, unsigned long *start, unsigned long *end) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 3d613e721a75..ea6a397b64a6 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -80,7 +80,7 @@ SYM_FUNC_START(__hyp_do_panic) mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) msr spsr_el2, lr - ldr lr, =nvhe_hyp_panic_handler + adr_l lr, nvhe_hyp_panic_handler hyp_kimg_va lr, x6 msr elr_el2, lr @@ -125,13 +125,11 @@ alternative_else_nop_endif add sp, sp, #16 /* * Compute the idmap address of __kvm_handle_stub_hvc and - * jump there. Since we use kimage_voffset, do not use the - * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead - * (by loading it from the constant pool). + * jump there. * * Preserve x0-x4, which may contain stub parameters. */ - ldr x5, =__kvm_handle_stub_hvc + adr_l x5, __kvm_handle_stub_hvc hyp_pa x5, x6 br x5 SYM_FUNC_END(__host_hvc) @@ -153,6 +151,18 @@ SYM_FUNC_END(__host_hvc) .macro invalid_host_el2_vect .align 7 + + /* + * Test whether the SP has overflowed, without corrupting a GPR. + * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit + * of SP should always be 1. + */ + add sp, sp, x0 // sp' = sp + x0 + sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp + tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@ + sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 + sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp + /* If a guest is loaded, panic out of it. */ stp x0, x1, [sp, #-16]! get_loaded_vcpu x0, x1 @@ -165,6 +175,18 @@ SYM_FUNC_END(__host_hvc) * been partially clobbered by __host_enter. */ b hyp_panic + +.L__hyp_sp_overflow\@: + /* + * Reset SP to the top of the stack, to allow handling the hyp_panic. + * This corrupts the stack but is ok, since we won't be attempting + * any unwinding here. + */ + ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1 + mov sp, x0 + + b hyp_panic_bad_stack + ASM_BUG() .endm .macro invalid_host_el1_vect @@ -198,15 +220,15 @@ SYM_CODE_START(__kvm_hyp_host_vector) invalid_host_el2_vect // FIQ EL2h invalid_host_el2_vect // Error EL2h - host_el1_sync_vect // Synchronous 64-bit EL1 - invalid_host_el1_vect // IRQ 64-bit EL1 - invalid_host_el1_vect // FIQ 64-bit EL1 - invalid_host_el1_vect // Error 64-bit EL1 + host_el1_sync_vect // Synchronous 64-bit EL1/EL0 + invalid_host_el1_vect // IRQ 64-bit EL1/EL0 + invalid_host_el1_vect // FIQ 64-bit EL1/EL0 + invalid_host_el1_vect // Error 64-bit EL1/EL0 - invalid_host_el1_vect // Synchronous 32-bit EL1 - invalid_host_el1_vect // IRQ 32-bit EL1 - invalid_host_el1_vect // FIQ 32-bit EL1 - invalid_host_el1_vect // Error 32-bit EL1 + host_el1_sync_vect // Synchronous 32-bit EL1/EL0 + invalid_host_el1_vect // IRQ 32-bit EL1/EL0 + invalid_host_el1_vect // FIQ 32-bit EL1/EL0 + invalid_host_el1_vect // Error 32-bit EL1/EL0 SYM_CODE_END(__kvm_hyp_host_vector) /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 5e2197db0d32..3cea4b6ac23e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -160,7 +160,23 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct DECLARE_REG(size_t, size, host_ctxt, 2); DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); - cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot); + /* + * __pkvm_create_private_mapping() populates a pointer with the + * hypervisor start address of the allocation. + * + * However, handle___pkvm_create_private_mapping() hypercall crosses the + * EL1/EL2 boundary so the pointer would not be valid in this context. + * + * Instead pass the allocation address as the return value (or return + * ERR_PTR() on failure). + */ + unsigned long haddr; + int err = __pkvm_create_private_mapping(phys, size, prot, &haddr); + + if (err) + haddr = (unsigned long)ERR_PTR(err); + + cpu_reg(host_ctxt, 1) = haddr; } static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index cdbe8e246418..96193cb31a39 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -37,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, return err; } -unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, - enum kvm_pgtable_prot prot) +/** + * pkvm_alloc_private_va_range - Allocates a private VA range. + * @size: The size of the VA range to reserve. + * @haddr: The hypervisor virtual start address of the allocation. + * + * The private virtual address (VA) range is allocated above __io_map_base + * and aligned based on the order of @size. + * + * Return: 0 on success or negative error code on failure. + */ +int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) { - unsigned long addr; - int err; + unsigned long base, addr; + int ret = 0; hyp_spin_lock(&pkvm_pgd_lock); - size = PAGE_ALIGN(size + offset_in_page(phys)); - addr = __io_map_base; - __io_map_base += size; + /* Align the allocation based on the order of its size */ + addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); - /* Are we overflowing on the vmemmap ? */ - if (__io_map_base > __hyp_vmemmap) { - __io_map_base -= size; - addr = (unsigned long)ERR_PTR(-ENOMEM); - goto out; - } + /* The allocated size is always a multiple of PAGE_SIZE */ + base = addr + PAGE_ALIGN(size); - err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot); - if (err) { - addr = (unsigned long)ERR_PTR(err); - goto out; + /* Are we overflowing on the vmemmap ? */ + if (!addr || base > __hyp_vmemmap) + ret = -ENOMEM; + else { + __io_map_base = base; + *haddr = addr; } - addr = addr + offset_in_page(phys); -out: hyp_spin_unlock(&pkvm_pgd_lock); - return addr; + return ret; +} + +int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, + enum kvm_pgtable_prot prot, + unsigned long *haddr) +{ + unsigned long addr; + int err; + + size = PAGE_ALIGN(size + offset_in_page(phys)); + err = pkvm_alloc_private_va_range(size, &addr); + if (err) + return err; + + err = __pkvm_create_mappings(addr, size, phys, prot); + if (err) + return err; + + *haddr = addr + offset_in_page(phys); + return err; } int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot) @@ -146,7 +170,8 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot) int hyp_map_vectors(void) { phys_addr_t phys; - void *bp_base; + unsigned long bp_base; + int ret; if (!kvm_system_needs_idmapped_vectors()) { __hyp_bp_vect_base = __bp_harden_hyp_vecs; @@ -154,13 +179,12 @@ int hyp_map_vectors(void) } phys = __hyp_pa(__bp_harden_hyp_vecs); - bp_base = (void *)__pkvm_create_private_mapping(phys, - __BP_HARDEN_HYP_VECS_SZ, - PAGE_HYP_EXEC); - if (IS_ERR_OR_NULL(bp_base)) - return PTR_ERR(bp_base); + ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ, + PAGE_HYP_EXEC, &bp_base); + if (ret) + return ret; - __hyp_bp_vect_base = bp_base; + __hyp_bp_vect_base = (void *)bp_base; return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 27af337f9fea..e8d4ea2fcfa0 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -99,17 +99,42 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, return ret; for (i = 0; i < hyp_nr_cpus; i++) { + struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i); + unsigned long hyp_addr; + start = (void *)kern_hyp_va(per_cpu_base[i]); end = start + PAGE_ALIGN(hyp_percpu_size); ret = pkvm_create_mappings(start, end, PAGE_HYP); if (ret) return ret; - end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va; - start = end - PAGE_SIZE; - ret = pkvm_create_mappings(start, end, PAGE_HYP); + /* + * Allocate a contiguous HYP private VA range for the stack + * and guard page. The allocation is also aligned based on + * the order of its size. + */ + ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + if (ret) + return ret; + + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + hyp_spin_lock(&pkvm_pgd_lock); + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, + PAGE_SIZE, params->stack_pa, PAGE_HYP); + hyp_spin_unlock(&pkvm_pgd_lock); if (ret) return ret; + + /* Update stack_hyp_va to end of the stack's private VA range */ + params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } /* diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6410d21d8695..6db801db8f27 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -47,10 +47,24 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TFP | CPTR_EL2_TZ; __activate_traps_fpsimd32(vcpu); } + if (cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; write_sysreg(val, cptr_el2); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); + if (cpus_have_final_cap(ARM64_SME)) { + val = read_sysreg_s(SYS_HFGRTR_EL2); + val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK); + write_sysreg_s(val, SYS_HFGRTR_EL2); + + val = read_sysreg_s(SYS_HFGWTR_EL2); + val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK); + write_sysreg_s(val, SYS_HFGWTR_EL2); + } + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; @@ -94,9 +108,25 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); + if (cpus_have_final_cap(ARM64_SME)) { + u64 val; + + val = read_sysreg_s(SYS_HFGRTR_EL2); + val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK; + write_sysreg_s(val, SYS_HFGRTR_EL2); + + val = read_sysreg_s(SYS_HFGWTR_EL2); + val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK; + write_sysreg_s(val, SYS_HFGWTR_EL2); + } + cptr = CPTR_EL2_DEFAULT; if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) cptr |= CPTR_EL2_TZ; + if (cpus_have_final_cap(ARM64_SME)) + cptr &= ~CPTR_EL2_TSM; write_sysreg(cptr, cptr_el2); write_sysreg(__kvm_hyp_host_vector, vbar_el2); @@ -120,16 +150,13 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) } } -/** +/* * Disable host events, enable guest events */ -static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +#ifdef CONFIG_HW_PERF_EVENTS +static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu) { - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; + struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events; if (pmu->events_host) write_sysreg(pmu->events_host, pmcntenclr_el0); @@ -140,16 +167,12 @@ static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) return (pmu->events_host || pmu->events_guest); } -/** +/* * Disable guest events, enable host events */ -static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) { - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; + struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events; if (pmu->events_guest) write_sysreg(pmu->events_guest, pmcntenclr_el0); @@ -157,8 +180,12 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) if (pmu->events_host) write_sysreg(pmu->events_host, pmcntenset_el0); } +#else +#define __pmu_switch_to_guest(v) ({ false; }) +#define __pmu_switch_to_host(v) do {} while (0) +#endif -/** +/* * Handler for protected VM MSR, MRS or System instruction execution in AArch64. * * Returns true if the hypervisor has handled the exit, and control should go @@ -175,23 +202,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_handle_pvm_sysreg(vcpu, exit_code)); } -/** - * Handler for protected floating-point and Advanced SIMD accesses. - * - * Returns true if the hypervisor has handled the exit, and control should go - * back to the guest, or false if it hasn't. - */ -static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - /* Linux guests assume support for floating-point and Advanced SIMD. */ - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), - PVM_ID_AA64PFR0_ALLOW)); - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), - PVM_ID_AA64PFR0_ALLOW)); - - return kvm_hyp_handle_fpsimd(vcpu, exit_code); -} - static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -207,7 +217,7 @@ static const exit_handler_fn pvm_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64, [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted, - [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, @@ -274,7 +284,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); + pmu_switch_needed = __pmu_switch_to_guest(vcpu); __sysreg_save_state_nvhe(host_ctxt); /* @@ -336,7 +346,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __debug_restore_host_buffers_nvhe(vcpu); if (pmu_switch_needed) - __pmu_switch_to_host(host_ctxt); + __pmu_switch_to_host(vcpu); /* Returning to host will clear PSR.I, remask PMR if needed */ if (system_uses_irq_prio_masking()) @@ -347,7 +357,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) return exit_code; } -void __noreturn hyp_panic(void) +asmlinkage void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); @@ -369,6 +379,11 @@ void __noreturn hyp_panic(void) unreachable(); } +asmlinkage void __noreturn hyp_panic_bad_stack(void) +{ + hyp_panic(); +} + asmlinkage void kvm_unexpected_el2_exception(void) { return __kvm_unexpected_el2_exception(); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 33f5181af330..3f5d7bd171c5 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -90,9 +90,6 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) u64 set_mask = 0; u64 allow_mask = PVM_ID_AA64PFR0_ALLOW; - if (!vcpu_has_sve(vcpu)) - allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE); - set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val, PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 262dfe03134d..969f20daf97a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -41,7 +41,8 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); + val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN | + CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN); /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to @@ -62,6 +63,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); } + if (cpus_have_final_cap(ARM64_SME)) + write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2, + sctlr_el2); + write_sysreg(val, cpacr_el1); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1); @@ -83,6 +88,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + if (cpus_have_final_cap(ARM64_SME)) + write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2, + sctlr_el2); + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); if (!arm64_kernel_unmapped_at_el0()) diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 202b8c455724..c9f401fa01a9 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -9,6 +9,13 @@ #include <kvm/arm_hypercalls.h> #include <kvm/arm_psci.h> +#define KVM_ARM_SMCCC_STD_FEATURES \ + GENMASK(KVM_REG_ARM_STD_BMAP_BIT_COUNT - 1, 0) +#define KVM_ARM_SMCCC_STD_HYP_FEATURES \ + GENMASK(KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT - 1, 0) +#define KVM_ARM_SMCCC_VENDOR_HYP_FEATURES \ + GENMASK(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT - 1, 0) + static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) { struct system_time_snapshot systime_snapshot; @@ -58,13 +65,73 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) val[3] = lower_32_bits(cycles); } +static bool kvm_hvc_call_default_allowed(u32 func_id) +{ + switch (func_id) { + /* + * List of function-ids that are not gated with the bitmapped + * feature firmware registers, and are to be allowed for + * servicing the call by default. + */ + case ARM_SMCCC_VERSION_FUNC_ID: + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + return true; + default: + /* PSCI 0.2 and up is in the 0:0x1f range */ + if (ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD && + ARM_SMCCC_FUNC_NUM(func_id) <= 0x1f) + return true; + + /* + * KVM's PSCI 0.1 doesn't comply with SMCCC, and has + * its own function-id base and range + */ + if (func_id >= KVM_PSCI_FN(0) && func_id <= KVM_PSCI_FN(3)) + return true; + + return false; + } +} + +static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) +{ + struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; + + switch (func_id) { + case ARM_SMCCC_TRNG_VERSION: + case ARM_SMCCC_TRNG_FEATURES: + case ARM_SMCCC_TRNG_GET_UUID: + case ARM_SMCCC_TRNG_RND32: + case ARM_SMCCC_TRNG_RND64: + return test_bit(KVM_REG_ARM_STD_BIT_TRNG_V1_0, + &smccc_feat->std_bmap); + case ARM_SMCCC_HV_PV_TIME_FEATURES: + case ARM_SMCCC_HV_PV_TIME_ST: + return test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME, + &smccc_feat->std_hyp_bmap); + case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: + case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: + return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT, + &smccc_feat->vendor_hyp_bmap); + case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: + return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP, + &smccc_feat->vendor_hyp_bmap); + default: + return kvm_hvc_call_default_allowed(func_id); + } +} + int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) { + struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); u64 val[4] = {SMCCC_RET_NOT_SUPPORTED}; u32 feature; gpa_t gpa; + if (!kvm_hvc_call_allowed(vcpu, func_id)) + goto out; + switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: val[0] = ARM_SMCCC_VERSION_1_1; @@ -120,7 +187,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) } break; case ARM_SMCCC_HV_PV_TIME_FEATURES: - val[0] = SMCCC_RET_SUCCESS; + if (test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME, + &smccc_feat->std_hyp_bmap)) + val[0] = SMCCC_RET_SUCCESS; break; } break; @@ -139,8 +208,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3; break; case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: - val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES); - val[0] |= BIT(ARM_SMCCC_KVM_FUNC_PTP); + val[0] = smccc_feat->vendor_hyp_bmap; break; case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: kvm_ptp_get_time(vcpu, val); @@ -155,6 +223,259 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) return kvm_psci_call(vcpu); } +out: smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]); return 1; } + +static const u64 kvm_arm_fw_reg_ids[] = { + KVM_REG_ARM_PSCI_VERSION, + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, + KVM_REG_ARM_STD_BMAP, + KVM_REG_ARM_STD_HYP_BMAP, + KVM_REG_ARM_VENDOR_HYP_BMAP, +}; + +void kvm_arm_init_hypercalls(struct kvm *kvm) +{ + struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat; + + smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; + smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; + smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; +} + +int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) +{ + return ARRAY_SIZE(kvm_arm_fw_reg_ids); +} + +int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kvm_arm_fw_reg_ids); i++) { + if (put_user(kvm_arm_fw_reg_ids[i], uindices++)) + return -EFAULT; + } + + return 0; +} + +#define KVM_REG_FEATURE_LEVEL_MASK GENMASK(3, 0) + +/* + * Convert the workaround level into an easy-to-compare number, where higher + * values mean better protection. + */ +static int get_kernel_wa_level(u64 regid) +{ + switch (regid) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + switch (arm64_get_spectre_v2_state()) { + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case SPECTRE_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; + case SPECTRE_UNAFFECTED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + switch (arm64_get_spectre_v4_state()) { + case SPECTRE_MITIGATED: + /* + * As for the hypercall discovery, we pretend we + * don't have any FW mitigation if SSBS is there at + * all times. + */ + if (cpus_have_final_cap(ARM64_SSBS)) + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; + fallthrough; + case SPECTRE_UNAFFECTED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; + } + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: + switch (arm64_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; + case SPECTRE_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; + case SPECTRE_UNAFFECTED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; + } + + return -EINVAL; +} + +int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: + val = kvm_psci_version(vcpu); + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: + val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; + break; + case KVM_REG_ARM_STD_BMAP: + val = READ_ONCE(smccc_feat->std_bmap); + break; + case KVM_REG_ARM_STD_HYP_BMAP: + val = READ_ONCE(smccc_feat->std_hyp_bmap); + break; + case KVM_REG_ARM_VENDOR_HYP_BMAP: + val = READ_ONCE(smccc_feat->vendor_hyp_bmap); + break; + default: + return -ENOENT; + } + + if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) +{ + int ret = 0; + struct kvm *kvm = vcpu->kvm; + struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat; + unsigned long *fw_reg_bmap, fw_reg_features; + + switch (reg_id) { + case KVM_REG_ARM_STD_BMAP: + fw_reg_bmap = &smccc_feat->std_bmap; + fw_reg_features = KVM_ARM_SMCCC_STD_FEATURES; + break; + case KVM_REG_ARM_STD_HYP_BMAP: + fw_reg_bmap = &smccc_feat->std_hyp_bmap; + fw_reg_features = KVM_ARM_SMCCC_STD_HYP_FEATURES; + break; + case KVM_REG_ARM_VENDOR_HYP_BMAP: + fw_reg_bmap = &smccc_feat->vendor_hyp_bmap; + fw_reg_features = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; + break; + default: + return -ENOENT; + } + + /* Check for unsupported bit */ + if (val & ~fw_reg_features) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) && + val != *fw_reg_bmap) { + ret = -EBUSY; + goto out; + } + + WRITE_ONCE(*fw_reg_bmap, val); +out: + mutex_unlock(&kvm->lock); + return ret; +} + +int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int wa_level; + + if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: + { + bool wants_02; + + wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); + + switch (val) { + case KVM_ARM_PSCI_0_1: + if (wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + case KVM_ARM_PSCI_0_2: + case KVM_ARM_PSCI_1_0: + case KVM_ARM_PSCI_1_1: + if (!wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + } + break; + } + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: + if (val & ~KVM_REG_FEATURE_LEVEL_MASK) + return -EINVAL; + + if (get_kernel_wa_level(reg->id) < val) + return -EINVAL; + + return 0; + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) + return -EINVAL; + + /* The enabled bit must not be set unless the level is AVAIL. */ + if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) && + (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL) + return -EINVAL; + + /* + * Map all the possible incoming states to the only two we + * really want to deal with. + */ + switch (val & KVM_REG_FEATURE_LEVEL_MASK) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: + wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: + wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; + break; + default: + return -EINVAL; + } + + /* + * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the + * other way around. + */ + if (get_kernel_wa_level(reg->id) < wa_level) + return -EINVAL; + + return 0; + case KVM_REG_ARM_STD_BMAP: + case KVM_REG_ARM_STD_HYP_BMAP: + case KVM_REG_ARM_VENDOR_HYP_BMAP: + return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val); + default: + return -ENOENT; + } + + return -EINVAL; +} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index b47df73e98d7..ba20405d2dc2 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -145,6 +145,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) inject_abt64(vcpu, true, addr); } +void kvm_inject_size_fault(struct kvm_vcpu *vcpu) +{ + unsigned long addr, esr; + + addr = kvm_vcpu_get_fault_ipa(vcpu); + addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + if (kvm_vcpu_trap_is_iabt(vcpu)) + kvm_inject_pabt(vcpu, addr); + else + kvm_inject_dabt(vcpu, addr); + + /* + * If AArch64 or LPAE, set FSC to 0 to indicate an Address + * Size Fault at level 0, as if exceeding PARange. + * + * Non-LPAE guests will only get the external abort, as there + * is no way to to describe the ASF. + */ + if (vcpu_el1_is_32bit(vcpu) && + !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE)) + return; + + esr = vcpu_read_sys_reg(vcpu, ESR_EL1); + esr &= ~GENMASK_ULL(5, 0); + vcpu_write_sys_reg(vcpu, esr, ESR_EL1); +} + /** * kvm_inject_undefined - inject an undefined instruction into the guest * @vcpu: The vCPU in which to inject the exception diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 53ae2c0640bc..f5651a05b6a8 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -258,8 +258,8 @@ static bool kvm_host_owns_hyp_mappings(void) return true; } -static int __create_hyp_mappings(unsigned long start, unsigned long size, - unsigned long phys, enum kvm_pgtable_prot prot) +int __create_hyp_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot) { int err; @@ -457,23 +457,22 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return 0; } -static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, - unsigned long *haddr, - enum kvm_pgtable_prot prot) + +/** + * hyp_alloc_private_va_range - Allocates a private VA range. + * @size: The size of the VA range to reserve. + * @haddr: The hypervisor virtual start address of the allocation. + * + * The private virtual address (VA) range is allocated below io_map_base + * and aligned based on the order of @size. + * + * Return: 0 on success or negative error code on failure. + */ +int hyp_alloc_private_va_range(size_t size, unsigned long *haddr) { unsigned long base; int ret = 0; - if (!kvm_host_owns_hyp_mappings()) { - base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, - phys_addr, size, prot); - if (IS_ERR_OR_NULL((void *)base)) - return PTR_ERR((void *)base); - *haddr = base; - - return 0; - } - mutex_lock(&kvm_hyp_pgd_mutex); /* @@ -484,8 +483,10 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, * * The allocated size is always a multiple of PAGE_SIZE. */ - size = PAGE_ALIGN(size + offset_in_page(phys_addr)); - base = io_map_base - size; + base = io_map_base - PAGE_ALIGN(size); + + /* Align the allocation based on the order of its size */ + base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size)); /* * Verify that BIT(VA_BITS - 1) hasn't been flipped by @@ -495,19 +496,40 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, if ((base ^ io_map_base) & BIT(VA_BITS - 1)) ret = -ENOMEM; else - io_map_base = base; + *haddr = io_map_base = base; mutex_unlock(&kvm_hyp_pgd_mutex); + return ret; +} + +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, + enum kvm_pgtable_prot prot) +{ + unsigned long addr; + int ret = 0; + + if (!kvm_host_owns_hyp_mappings()) { + addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, + phys_addr, size, prot); + if (IS_ERR_VALUE(addr)) + return addr; + *haddr = addr; + + return 0; + } + + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + ret = hyp_alloc_private_va_range(size, &addr); if (ret) - goto out; + return ret; - ret = __create_hyp_mappings(base, size, phys_addr, prot); + ret = __create_hyp_mappings(addr, size, phys_addr, prot); if (ret) - goto out; + return ret; - *haddr = base + offset_in_page(phys_addr); -out: + *haddr = addr + offset_in_page(phys_addr); return ret; } @@ -1337,6 +1359,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + if (fault_status == FSC_FAULT) { + /* Beyond sanitised PARange (which is the IPA limit) */ + if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { + kvm_inject_size_fault(vcpu); + return 1; + } + + /* Falls between the IPA range and the PARange? */ + if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) { + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + if (is_iabt) + kvm_inject_pabt(vcpu, fault_ipa); + else + kvm_inject_dabt(vcpu, fault_ipa); + return 1; + } + } + /* Synchronous External Abort? */ if (kvm_vcpu_abt_issea(vcpu)) { /* diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 78fdc443adc7..11c43bed5f97 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -177,6 +177,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); if (kvm_pmu_pmc_is_chained(pmc) && @@ -198,6 +201,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { u64 reg; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); @@ -322,6 +328,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) return; @@ -357,7 +366,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; - if (!val) + if (!kvm_vcpu_has_pmu(vcpu) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { @@ -527,6 +536,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) return; @@ -576,6 +588,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { int i; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); @@ -739,6 +754,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, { u64 reg, mask; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + mask = ARMV8_PMU_EVTYPE_MASK; mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); @@ -756,8 +774,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || - is_protected_kvm_enabled()) + if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) return; mutex_lock(&arm_pmus_lock); @@ -827,6 +844,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) u64 val, mask = 0; int base, i, nr_events; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + if (!pmceid1) { val = read_sysreg(pmceid0_el0); base = 0; diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 03a6c1f4a09a..7887133d15f0 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -5,7 +5,8 @@ */ #include <linux/kvm_host.h> #include <linux/perf_event.h> -#include <asm/kvm_hyp.h> + +static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events); /* * Given the perf event attributes and system type, determine @@ -25,21 +26,26 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr) return (attr->exclude_host != attr->exclude_guest); } +struct kvm_pmu_events *kvm_get_pmu_events(void) +{ + return this_cpu_ptr(&kvm_pmu_events); +} + /* * Add events to track that we may want to switch at guest entry/exit * time. */ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) { - struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data); + struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr)) + if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr)) return; if (!attr->exclude_host) - ctx->pmu_events.events_host |= set; + pmu->events_host |= set; if (!attr->exclude_guest) - ctx->pmu_events.events_guest |= set; + pmu->events_guest |= set; } /* @@ -47,13 +53,13 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) */ void kvm_clr_pmu_events(u32 clr) { - struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data); + struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !ctx) + if (!kvm_arm_support_pmu_v3() || !pmu) return; - ctx->pmu_events.events_host &= ~clr; - ctx->pmu_events.events_guest &= ~clr; + pmu->events_host &= ~clr; + pmu->events_guest &= ~clr; } #define PMEVTYPER_READ_CASE(idx) \ @@ -169,16 +175,16 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events) */ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) { - struct kvm_host_data *host; + struct kvm_pmu_events *pmu; u32 events_guest, events_host; if (!kvm_arm_support_pmu_v3() || !has_vhe()) return; preempt_disable(); - host = this_cpu_ptr_hyp_sym(kvm_host_data); - events_guest = host->pmu_events.events_guest; - events_host = host->pmu_events.events_host; + pmu = kvm_get_pmu_events(); + events_guest = pmu->events_guest; + events_host = pmu->events_host; kvm_vcpu_pmu_enable_el0(events_guest); kvm_vcpu_pmu_disable_el0(events_host); @@ -190,15 +196,15 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) */ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) { - struct kvm_host_data *host; + struct kvm_pmu_events *pmu; u32 events_guest, events_host; if (!kvm_arm_support_pmu_v3() || !has_vhe()) return; - host = this_cpu_ptr_hyp_sym(kvm_host_data); - events_guest = host->pmu_events.events_guest; - events_host = host->pmu_events.events_host; + pmu = kvm_get_pmu_events(); + events_guest = pmu->events_guest; + events_host = pmu->events_host; kvm_vcpu_pmu_enable_el0(events_host); kvm_vcpu_pmu_disable_el0(events_guest); diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 708d80e8e60d..7fbc4c1b9df0 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -51,13 +51,6 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) return PSCI_RET_SUCCESS; } -static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) -{ - vcpu->arch.power_off = true; - kvm_make_request(KVM_REQ_SLEEP, vcpu); - kvm_vcpu_kick(vcpu); -} - static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, unsigned long affinity) { @@ -83,7 +76,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; - if (!vcpu->arch.power_off) { + if (!kvm_arm_vcpu_stopped(vcpu)) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) return PSCI_RET_ALREADY_ON; else @@ -107,12 +100,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); /* - * Make sure the reset request is observed if the change to - * power_off is observed. + * Make sure the reset request is observed if the RUNNABLE mp_state is + * observed. */ smp_wmb(); - vcpu->arch.power_off = false; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; kvm_vcpu_wake_up(vcpu); return PSCI_RET_SUCCESS; @@ -150,7 +143,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) mpidr = kvm_vcpu_get_mpidr_aff(tmp); if ((mpidr & target_affinity_mask) == target_affinity) { matching_cpus++; - if (!tmp->arch.power_off) + if (!kvm_arm_vcpu_stopped(tmp)) return PSCI_0_2_AFFINITY_LEVEL_ON; } } @@ -176,7 +169,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) * re-initialized. */ kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; + tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); @@ -202,6 +195,15 @@ static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu) KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2); } +static void kvm_psci_system_suspend(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + memset(&run->system_event, 0, sizeof(vcpu->run->system_event)); + run->system_event.type = KVM_SYSTEM_EVENT_SUSPEND; + run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) { int i; @@ -245,7 +247,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) val = kvm_psci_vcpu_suspend(vcpu); break; case PSCI_0_2_FN_CPU_OFF: - kvm_psci_vcpu_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); val = PSCI_RET_SUCCESS; break; case PSCI_0_2_FN_CPU_ON: @@ -305,9 +307,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) { + unsigned long val = PSCI_RET_NOT_SUPPORTED; u32 psci_fn = smccc_get_function(vcpu); + struct kvm *kvm = vcpu->kvm; u32 arg; - unsigned long val; int ret = 1; switch(psci_fn) { @@ -320,6 +323,8 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) if (val) break; + val = PSCI_RET_NOT_SUPPORTED; + switch(arg) { case PSCI_0_2_FN_PSCI_VERSION: case PSCI_0_2_FN_CPU_SUSPEND: @@ -336,18 +341,32 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) case ARM_SMCCC_VERSION_FUNC_ID: val = 0; break; + case PSCI_1_0_FN_SYSTEM_SUSPEND: + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) + val = 0; + break; case PSCI_1_1_FN_SYSTEM_RESET2: case PSCI_1_1_FN64_SYSTEM_RESET2: - if (minor >= 1) { + if (minor >= 1) val = 0; - break; - } - fallthrough; - default: - val = PSCI_RET_NOT_SUPPORTED; break; } break; + case PSCI_1_0_FN_SYSTEM_SUSPEND: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + /* + * Return directly to userspace without changing the vCPU's + * registers. Userspace depends on reading the SMCCC parameters + * to implement SYSTEM_SUSPEND. + */ + if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) { + kvm_psci_system_suspend(vcpu); + return 0; + } + break; case PSCI_1_1_FN_SYSTEM_RESET2: kvm_psci_narrow_to_32bit(vcpu); fallthrough; @@ -365,7 +384,7 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) val = PSCI_RET_INVALID_PARAMS; break; } - fallthrough; + break; default: return kvm_psci_0_2_call(vcpu); } @@ -382,7 +401,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) switch (psci_fn) { case KVM_PSCI_FN_CPU_OFF: - kvm_psci_vcpu_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: @@ -437,186 +456,3 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) return -EINVAL; } } - -int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) -{ - return 4; /* PSCI version and three workaround registers */ -} - -int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) - return -EFAULT; - - if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) - return -EFAULT; - - if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) - return -EFAULT; - - if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++)) - return -EFAULT; - - return 0; -} - -#define KVM_REG_FEATURE_LEVEL_WIDTH 4 -#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) - -/* - * Convert the workaround level into an easy-to-compare number, where higher - * values mean better protection. - */ -static int get_kernel_wa_level(u64 regid) -{ - switch (regid) { - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: - switch (arm64_get_spectre_v2_state()) { - case SPECTRE_VULNERABLE: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; - case SPECTRE_MITIGATED: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; - case SPECTRE_UNAFFECTED: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; - } - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: - switch (arm64_get_spectre_v4_state()) { - case SPECTRE_MITIGATED: - /* - * As for the hypercall discovery, we pretend we - * don't have any FW mitigation if SSBS is there at - * all times. - */ - if (cpus_have_final_cap(ARM64_SSBS)) - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; - fallthrough; - case SPECTRE_UNAFFECTED: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; - case SPECTRE_VULNERABLE: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; - } - break; - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: - switch (arm64_get_spectre_bhb_state()) { - case SPECTRE_VULNERABLE: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; - case SPECTRE_MITIGATED: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; - case SPECTRE_UNAFFECTED: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; - } - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; - } - - return -EINVAL; -} - -int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - - switch (reg->id) { - case KVM_REG_ARM_PSCI_VERSION: - val = kvm_psci_version(vcpu); - break; - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: - val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; - break; - default: - return -ENOENT; - } - - if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - return 0; -} - -int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - int wa_level; - - if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) - return -EFAULT; - - switch (reg->id) { - case KVM_REG_ARM_PSCI_VERSION: - { - bool wants_02; - - wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); - - switch (val) { - case KVM_ARM_PSCI_0_1: - if (wants_02) - return -EINVAL; - vcpu->kvm->arch.psci_version = val; - return 0; - case KVM_ARM_PSCI_0_2: - case KVM_ARM_PSCI_1_0: - case KVM_ARM_PSCI_1_1: - if (!wants_02) - return -EINVAL; - vcpu->kvm->arch.psci_version = val; - return 0; - } - break; - } - - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: - if (val & ~KVM_REG_FEATURE_LEVEL_MASK) - return -EINVAL; - - if (get_kernel_wa_level(reg->id) < val) - return -EINVAL; - - return 0; - - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: - if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | - KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) - return -EINVAL; - - /* The enabled bit must not be set unless the level is AVAIL. */ - if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) && - (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL) - return -EINVAL; - - /* - * Map all the possible incoming states to the only two we - * really want to deal with. - */ - switch (val & KVM_REG_FEATURE_LEVEL_MASK) { - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: - wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; - break; - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: - wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; - break; - default: - return -EINVAL; - } - - /* - * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the - * other way around. - */ - if (get_kernel_wa_level(reg->id) < wa_level) - return -EINVAL; - - return 0; - default: - return -ENOENT; - } - - return -EINVAL; -} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7b45c040cc27..d77be152cbd5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1132,6 +1132,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, case SYS_ID_AA64PFR1_EL1: if (!kvm_has_mte(vcpu->kvm)) val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); + + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_SME); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1144,6 +1146,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, if (!vcpu_has_ptrauth(vcpu)) val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + if (!cpus_have_final_cap(ARM64_HAS_WFXT)) + val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ @@ -1553,7 +1557,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), ID_SANITISED(ID_AA64ZFR0_EL1), - ID_UNALLOCATED(4,5), + ID_HIDDEN(ID_AA64SMFR0_EL1), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), @@ -1596,6 +1600,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TRFCR_EL1), undef_access }, + { SYS_DESC(SYS_SMPRI_EL1), undef_access }, + { SYS_DESC(SYS_SMCR_EL1), undef_access }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, @@ -1678,8 +1684,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, { SYS_DESC(SYS_CLIDR_EL1), access_clidr }, + { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, + { SYS_DESC(SYS_SVCR_EL0), undef_access }, { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, .reg = PMCR_EL0 }, @@ -1719,6 +1727,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, + { SYS_DESC(SYS_TPIDR2_EL0), undef_access }, { SYS_DESC(SYS_SCXTNUM_EL0), undef_access }, @@ -2014,20 +2023,22 @@ static const struct sys_reg_desc cp14_64_regs[] = { { Op1( 0), CRm( 2), .access = trap_raz_wi }, }; +#define CP15_PMU_SYS_REG(_map, _Op1, _CRn, _CRm, _Op2) \ + AA32(_map), \ + Op1(_Op1), CRn(_CRn), CRm(_CRm), Op2(_Op2), \ + .visibility = pmu_visibility + /* Macro to expand the PMEVCNTRn register */ #define PMU_PMEVCNTR(n) \ - /* PMEVCNTRn */ \ - { Op1(0), CRn(0b1110), \ - CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ - access_pmu_evcntr } + { CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \ + (0b1000 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \ + .access = access_pmu_evcntr } /* Macro to expand the PMEVTYPERn register */ #define PMU_PMEVTYPER(n) \ - /* PMEVTYPERn */ \ - { Op1(0), CRn(0b1110), \ - CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ - access_pmu_evtyper } - + { CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \ + (0b1100 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \ + .access = access_pmu_evtyper } /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit @@ -2067,25 +2078,25 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, /* PMU */ - { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr }, - { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten }, - { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten }, - { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs }, - { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc }, - { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr }, - { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid }, - { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid }, - { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr }, - { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper }, - { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr }, - { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr }, - { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten }, - { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, - { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, - { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 4), access_pmceid }, - { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 5), access_pmceid }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 0), .access = access_pmcr }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 1), .access = access_pmcnten }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 2), .access = access_pmcnten }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 3), .access = access_pmovs }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 4), .access = access_pmswinc }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 5), .access = access_pmselr }, + { CP15_PMU_SYS_REG(LO, 0, 9, 12, 6), .access = access_pmceid }, + { CP15_PMU_SYS_REG(LO, 0, 9, 12, 7), .access = access_pmceid }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 0), .access = access_pmu_evcntr }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 1), .access = access_pmu_evtyper }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 2), .access = access_pmu_evcntr }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 0), .access = access_pmuserenr }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 1), .access = access_pminten }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 2), .access = access_pminten }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 3), .access = access_pmovs }, + { CP15_PMU_SYS_REG(HI, 0, 9, 14, 4), .access = access_pmceid }, + { CP15_PMU_SYS_REG(HI, 0, 9, 14, 5), .access = access_pmceid }, /* PMMIR */ - { Op1( 0), CRn( 9), CRm(14), Op2( 6), trap_raz_wi }, + { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 6), .access = trap_raz_wi }, /* PRRR/MAIR0 */ { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 }, @@ -2170,7 +2181,7 @@ static const struct sys_reg_desc cp15_regs[] = { PMU_PMEVTYPER(29), PMU_PMEVTYPER(30), /* PMCCFILTR */ - { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, + { CP15_PMU_SYS_REG(DIRECT, 0, 14, 15, 7), .access = access_pmu_evtyper }, { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, @@ -2179,7 +2190,7 @@ static const struct sys_reg_desc cp15_regs[] = { static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, - { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, + { CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ @@ -2187,25 +2198,24 @@ static const struct sys_reg_desc cp15_64_regs[] = { { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, }; -static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, - bool is_32) +static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, + bool is_32) { unsigned int i; for (i = 0; i < n; i++) { if (!is_32 && table[i].reg && !table[i].reset) { - kvm_err("sys_reg table %p entry %d has lacks reset\n", - table, i); - return 1; + kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i); + return false; } if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) { - kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); - return 1; + kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1); + return false; } } - return 0; + return true; } int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu) @@ -2246,27 +2256,27 @@ static void perform_access(struct kvm_vcpu *vcpu, * @table: array of trap descriptors * @num: size of the trap descriptor array * - * Return 0 if the access has been handled, and -1 if not. + * Return true if the access has been handled, false if not. */ -static int emulate_cp(struct kvm_vcpu *vcpu, - struct sys_reg_params *params, - const struct sys_reg_desc *table, - size_t num) +static bool emulate_cp(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, + const struct sys_reg_desc *table, + size_t num) { const struct sys_reg_desc *r; if (!table) - return -1; /* Not handled */ + return false; /* Not handled */ r = find_reg(params, table, num); if (r) { perform_access(vcpu, params, r); - return 0; + return true; } /* Not handled */ - return -1; + return false; } static void unhandled_cp_access(struct kvm_vcpu *vcpu, @@ -2304,7 +2314,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, size_t nr_global) { struct sys_reg_params params; - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); int Rt2 = (esr >> 10) & 0x1f; @@ -2330,7 +2340,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, * potential register operation in the case of a read and return * with success. */ - if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (emulate_cp(vcpu, ¶ms, global, nr_global)) { /* Split up the value between registers for the read side */ if (!params.is_write) { vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); @@ -2344,34 +2354,144 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, return 1; } +static bool emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params); + +/* + * The CP10 ID registers are architecturally mapped to AArch64 feature + * registers. Abuse that fact so we can rely on the AArch64 handler for accesses + * from AArch32. + */ +static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params) +{ + u8 reg_id = (esr >> 10) & 0xf; + bool valid; + + params->is_write = ((esr & 1) == 0); + params->Op0 = 3; + params->Op1 = 0; + params->CRn = 0; + params->CRm = 3; + + /* CP10 ID registers are read-only */ + valid = !params->is_write; + + switch (reg_id) { + /* MVFR0 */ + case 0b0111: + params->Op2 = 0; + break; + /* MVFR1 */ + case 0b0110: + params->Op2 = 1; + break; + /* MVFR2 */ + case 0b0101: + params->Op2 = 2; + break; + default: + valid = false; + } + + if (valid) + return true; + + kvm_pr_unimpl("Unhandled cp10 register %s: %u\n", + params->is_write ? "write" : "read", reg_id); + return false; +} + +/** + * kvm_handle_cp10_id() - Handles a VMRS trap on guest access to a 'Media and + * VFP Register' from AArch32. + * @vcpu: The vCPU pointer + * + * MVFR{0-2} are architecturally mapped to the AArch64 MVFR{0-2}_EL1 registers. + * Work out the correct AArch64 system register encoding and reroute to the + * AArch64 system register emulation. + */ +int kvm_handle_cp10_id(struct kvm_vcpu *vcpu) +{ + int Rt = kvm_vcpu_sys_get_rt(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); + struct sys_reg_params params; + + /* UNDEF on any unhandled register access */ + if (!kvm_esr_cp10_id_to_sys64(esr, ¶ms)) { + kvm_inject_undefined(vcpu); + return 1; + } + + if (emulate_sys_reg(vcpu, ¶ms)) + vcpu_set_reg(vcpu, Rt, params.regval); + + return 1; +} + +/** + * kvm_emulate_cp15_id_reg() - Handles an MRC trap on a guest CP15 access where + * CRn=0, which corresponds to the AArch32 feature + * registers. + * @vcpu: the vCPU pointer + * @params: the system register access parameters. + * + * Our cp15 system register tables do not enumerate the AArch32 feature + * registers. Conveniently, our AArch64 table does, and the AArch32 system + * register encoding can be trivially remapped into the AArch64 for the feature + * registers: Append op0=3, leaving op1, CRn, CRm, and op2 the same. + * + * According to DDI0487G.b G7.3.1, paragraph "Behavior of VMSAv8-32 32-bit + * System registers with (coproc=0b1111, CRn==c0)", read accesses from this + * range are either UNKNOWN or RES0. Rerouting remains architectural as we + * treat undefined registers in this range as RAZ. + */ +static int kvm_emulate_cp15_id_reg(struct kvm_vcpu *vcpu, + struct sys_reg_params *params) +{ + int Rt = kvm_vcpu_sys_get_rt(vcpu); + + /* Treat impossible writes to RO registers as UNDEFINED */ + if (params->is_write) { + unhandled_cp_access(vcpu, params); + return 1; + } + + params->Op0 = 3; + + /* + * All registers where CRm > 3 are known to be UNKNOWN/RAZ from AArch32. + * Avoid conflicting with future expansion of AArch64 feature registers + * and simply treat them as RAZ here. + */ + if (params->CRm > 3) + params->regval = 0; + else if (!emulate_sys_reg(vcpu, params)) + return 1; + + vcpu_set_reg(vcpu, Rt, params->regval); + return 1; +} + /** * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, const struct sys_reg_desc *global, size_t nr_global) { - struct sys_reg_params params; - u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - params.CRm = (esr >> 1) & 0xf; - params.regval = vcpu_get_reg(vcpu, Rt); - params.is_write = ((esr & 1) == 0); - params.CRn = (esr >> 10) & 0xf; - params.Op0 = 0; - params.Op1 = (esr >> 14) & 0x7; - params.Op2 = (esr >> 17) & 0x7; + params->regval = vcpu_get_reg(vcpu, Rt); - if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { - if (!params.is_write) - vcpu_set_reg(vcpu, Rt, params.regval); + if (emulate_cp(vcpu, params, global, nr_global)) { + if (!params->is_write) + vcpu_set_reg(vcpu, Rt, params->regval); return 1; } - unhandled_cp_access(vcpu, ¶ms); + unhandled_cp_access(vcpu, params); return 1; } @@ -2382,7 +2502,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu) int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) { - return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); + struct sys_reg_params params; + + params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu)); + + /* + * Certain AArch32 ID registers are handled by rerouting to the AArch64 + * system register table. Registers in the ID range where CRm=0 are + * excluded from this scheme as they do not trivially map into AArch64 + * system register encodings. + */ + if (params.Op1 == 0 && params.CRn == 0 && params.CRm) + return kvm_emulate_cp15_id_reg(vcpu, ¶ms); + + return kvm_handle_cp_32(vcpu, ¶ms, cp15_regs, ARRAY_SIZE(cp15_regs)); } int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) @@ -2392,7 +2525,11 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) { - return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs)); + struct sys_reg_params params; + + params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu)); + + return kvm_handle_cp_32(vcpu, ¶ms, cp14_regs, ARRAY_SIZE(cp14_regs)); } static bool is_imp_def_sys_reg(struct sys_reg_params *params) @@ -2401,7 +2538,14 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params) return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011; } -static int emulate_sys_reg(struct kvm_vcpu *vcpu, +/** + * emulate_sys_reg - Emulate a guest access to an AArch64 system register + * @vcpu: The VCPU pointer + * @params: Decoded system register parameters + * + * Return: true if the system register access was successful, false otherwise. + */ +static bool emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { const struct sys_reg_desc *r; @@ -2410,7 +2554,10 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, if (likely(r)) { perform_access(vcpu, params, r); - } else if (is_imp_def_sys_reg(params)) { + return true; + } + + if (is_imp_def_sys_reg(params)) { kvm_inject_undefined(vcpu); } else { print_sys_reg_msg(params, @@ -2418,7 +2565,7 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); kvm_inject_undefined(vcpu); } - return 1; + return false; } /** @@ -2446,18 +2593,18 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - int ret; trace_kvm_handle_sys_reg(esr); params = esr_sys64_to_params(esr); params.regval = vcpu_get_reg(vcpu, Rt); - ret = emulate_sys_reg(vcpu, ¶ms); + if (!emulate_sys_reg(vcpu, ¶ms)) + return 1; if (!params.is_write) vcpu_set_reg(vcpu, Rt, params.regval); - return ret; + return 1; } /****************************************************************************** @@ -2860,18 +3007,22 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } -void kvm_sys_reg_table_init(void) +int kvm_sys_reg_table_init(void) { + bool valid = true; unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false)); - BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true)); - BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true)); - BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true)); - BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true)); - BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false)); + valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); + valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true); + valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true); + valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true); + valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true); + valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false); + + if (!valid) + return -EINVAL; /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) @@ -2894,4 +3045,6 @@ void kvm_sys_reg_table_init(void) break; /* Clear all higher bits. */ cache_levels &= (1 << (i*3))-1; + + return 0; } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index cc0cc95a0280..aee8ea054f0d 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -35,12 +35,19 @@ struct sys_reg_params { .Op2 = ((esr) >> 17) & 0x7, \ .is_write = !((esr) & 1) }) +#define esr_cp1x_32_to_params(esr) \ + ((struct sys_reg_params){ .Op1 = ((esr) >> 14) & 0x7, \ + .CRn = ((esr) >> 10) & 0xf, \ + .CRm = ((esr) >> 1) & 0xf, \ + .Op2 = ((esr) >> 17) & 0x7, \ + .is_write = !((esr) & 1) }) + struct sys_reg_desc { /* Sysreg string for debug */ const char *name; enum { - AA32_ZEROHIGH, + AA32_DIRECT, AA32_LO, AA32_HI, } aarch32_map; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 77feafd5c0e3..f6d4f4052555 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -319,7 +319,12 @@ int vgic_init(struct kvm *kvm) vgic_debug_init(kvm); - dist->implementation_rev = 2; + /* + * If userspace didn't set the GIC implementation revision, + * default to the latest and greatest. You know want it. + */ + if (!dist->implementation_rev) + dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST; dist->initialized = true; out: diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2e13402be3bd..9d3299a70242 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -683,7 +683,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, if (!vcpu) return E_ITS_INT_UNMAPPED_INTERRUPT; - if (!vcpu->arch.vgic_cpu.lpis_enabled) + if (!vgic_lpis_enabled(vcpu)) return -EBUSY; vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq); @@ -894,6 +894,18 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, return update_affinity(ite->irq, vcpu); } +static bool __is_visible_gfn_locked(struct vgic_its *its, gpa_t gpa) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + int idx; + bool ret; + + idx = srcu_read_lock(&its->dev->kvm->srcu); + ret = kvm_is_visible_gfn(its->dev->kvm, gfn); + srcu_read_unlock(&its->dev->kvm->srcu, idx); + return ret; +} + /* * Check whether an ID can be stored into the corresponding guest table. * For a direct table this is pretty easy, but gets a bit nasty for @@ -908,9 +920,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, u64 indirect_ptr, type = GITS_BASER_TYPE(baser); phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); int esz = GITS_BASER_ENTRY_SIZE(baser); - int index, idx; - gfn_t gfn; - bool ret; + int index; switch (type) { case GITS_BASER_TYPE_DEVICE: @@ -933,12 +943,11 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, return false; addr = base + id * esz; - gfn = addr >> PAGE_SHIFT; if (eaddr) *eaddr = addr; - goto out; + return __is_visible_gfn_locked(its, addr); } /* calculate and check the index into the 1st level */ @@ -964,27 +973,42 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, /* Find the address of the actual entry */ index = id % (SZ_64K / esz); indirect_ptr += index * esz; - gfn = indirect_ptr >> PAGE_SHIFT; if (eaddr) *eaddr = indirect_ptr; -out: - idx = srcu_read_lock(&its->dev->kvm->srcu); - ret = kvm_is_visible_gfn(its->dev->kvm, gfn); - srcu_read_unlock(&its->dev->kvm->srcu, idx); - return ret; + return __is_visible_gfn_locked(its, indirect_ptr); } +/* + * Check whether an event ID can be stored in the corresponding Interrupt + * Translation Table, which starts at device->itt_addr. + */ +static bool vgic_its_check_event_id(struct vgic_its *its, struct its_device *device, + u32 event_id) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int ite_esz = abi->ite_esz; + gpa_t gpa; + + /* max table size is: BIT_ULL(device->num_eventid_bits) * ite_esz */ + if (event_id >= BIT_ULL(device->num_eventid_bits)) + return false; + + gpa = device->itt_addr + event_id * ite_esz; + return __is_visible_gfn_locked(its, gpa); +} + +/* + * Add a new collection into the ITS collection table. + * Returns 0 on success, and a negative error value for generic errors. + */ static int vgic_its_alloc_collection(struct vgic_its *its, struct its_collection **colp, u32 coll_id) { struct its_collection *collection; - if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) - return E_ITS_MAPC_COLLECTION_OOR; - collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT); if (!collection) return -ENOMEM; @@ -1061,7 +1085,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, if (!device) return E_ITS_MAPTI_UNMAPPED_DEVICE; - if (event_id >= BIT_ULL(device->num_eventid_bits)) + if (!vgic_its_check_event_id(its, device, event_id)) return E_ITS_MAPTI_ID_OOR; if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) @@ -1078,7 +1102,12 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, collection = find_collection(its, coll_id); if (!collection) { - int ret = vgic_its_alloc_collection(its, &collection, coll_id); + int ret; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) + return E_ITS_MAPC_COLLECTION_OOR; + + ret = vgic_its_alloc_collection(its, &collection, coll_id); if (ret) return ret; new_coll = collection; @@ -1233,6 +1262,10 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, if (!collection) { int ret; + if (!vgic_its_check_id(its, its->baser_coll_table, + coll_id, NULL)) + return E_ITS_MAPC_COLLECTION_OOR; + ret = vgic_its_alloc_collection(its, &collection, coll_id); if (ret) @@ -1272,6 +1305,11 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, return 0; } +int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq) +{ + return update_lpi_config(kvm, irq, NULL, true); +} + /* * The INV command syncs the configuration bits from the memory table. * Must be called with the its_lock mutex held. @@ -1288,7 +1326,41 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, if (!ite) return E_ITS_INV_UNMAPPED_INTERRUPT; - return update_lpi_config(kvm, ite->irq, NULL, true); + return vgic_its_inv_lpi(kvm, ite->irq); +} + +/** + * vgic_its_invall - invalidate all LPIs targetting a given vcpu + * @vcpu: the vcpu for which the RD is targetted by an invalidation + * + * Contrary to the INVALL command, this targets a RD instead of a + * collection, and we don't need to hold the its_lock, since no ITS is + * involved here. + */ +int vgic_its_invall(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + int irq_count, i = 0; + u32 *intids; + + irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; + update_lpi_config(kvm, irq, vcpu, false); + vgic_put_irq(kvm, irq); + } + + kfree(intids); + + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) + its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); + + return 0; } /* @@ -1305,32 +1377,13 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, u32 coll_id = its_cmd_get_collection(its_cmd); struct its_collection *collection; struct kvm_vcpu *vcpu; - struct vgic_irq *irq; - u32 *intids; - int irq_count, i; collection = find_collection(its, coll_id); if (!its_is_collection_mapped(collection)) return E_ITS_INVALL_UNMAPPED_COLLECTION; vcpu = kvm_get_vcpu(kvm, collection->target_addr); - - irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids); - if (irq_count < 0) - return irq_count; - - for (i = 0; i < irq_count; i++) { - irq = vgic_get_irq(kvm, NULL, intids[i]); - if (!irq) - continue; - update_lpi_config(kvm, irq, vcpu, false); - vgic_put_irq(kvm, irq); - } - - kfree(intids); - - if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) - its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); + vgic_its_invall(vcpu); return 0; } @@ -2175,6 +2228,9 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, if (!collection) return -EINVAL; + if (!vgic_its_check_event_id(its, dev, event_id)) + return -EINVAL; + ite = vgic_its_alloc_ite(dev, collection, event_id); if (IS_ERR(ite)) return PTR_ERR(ite); @@ -2183,8 +2239,10 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, vcpu = kvm_get_vcpu(kvm, collection->target_addr); irq = vgic_add_lpi(kvm, lpi_id, vcpu); - if (IS_ERR(irq)) + if (IS_ERR(irq)) { + its_free_ite(kvm, ite); return PTR_ERR(irq); + } ite->irq = irq; return offset; @@ -2296,6 +2354,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id, void *ptr, void *opaque) { struct its_device *dev; + u64 baser = its->baser_device_table; gpa_t itt_addr; u8 num_eventid_bits; u64 entry = *(u64 *)ptr; @@ -2316,6 +2375,9 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id, /* dte entry is valid */ offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + if (!vgic_its_check_id(its, baser, id, NULL)) + return -EINVAL; + dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -2445,6 +2507,9 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) if (ret > 0) ret = 0; + if (ret < 0) + vgic_its_free_device_list(its->dev->kvm, its); + return ret; } @@ -2461,6 +2526,11 @@ static int vgic_its_save_cte(struct vgic_its *its, return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz); } +/* + * Restore a collection entry into the ITS collection table. + * Return +1 on success, 0 if the entry was invalid (which should be + * interpreted as end-of-table), and a negative error value for generic errors. + */ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) { struct its_collection *collection; @@ -2487,6 +2557,10 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) collection = find_collection(its, coll_id); if (collection) return -EEXIST; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) + return -EINVAL; + ret = vgic_its_alloc_collection(its, &collection, coll_id); if (ret) return ret; @@ -2566,6 +2640,9 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) if (ret > 0) return 0; + if (ret < 0) + vgic_its_free_collection_list(its->dev->kvm, its); + return ret; } @@ -2597,7 +2674,10 @@ static int vgic_its_restore_tables_v0(struct vgic_its *its) if (ret) return ret; - return vgic_its_restore_device_tables(its); + ret = vgic_its_restore_device_tables(its); + if (ret) + vgic_its_free_collection_list(its->dev->kvm, its); + return ret; } static int vgic_its_commit_v0(struct vgic_its *its) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index 12e4c223e6b8..77a67e9d3d14 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -73,9 +73,13 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u32 reg; + switch (addr & 0x0c) { case GIC_DIST_IIDR: - if (val != vgic_mmio_read_v2_misc(vcpu, addr, len)) + reg = vgic_mmio_read_v2_misc(vcpu, addr, len); + if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) return -EINVAL; /* @@ -87,8 +91,16 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, * migration from old kernels to new kernels with legacy * userspace. */ - vcpu->kvm->arch.vgic.v2_groups_user_writable = true; - return 0; + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + switch (reg) { + case KVM_VGIC_IMP_REV_2: + case KVM_VGIC_IMP_REV_3: + vcpu->kvm->arch.vgic.v2_groups_user_writable = true; + dist->implementation_rev = reg; + return 0; + default: + return -EINVAL; + } } vgic_mmio_write_v2_misc(vcpu, addr, len, val); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 58e40b4874f8..f7aa7bcd6fb8 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -155,13 +155,27 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u32 reg; switch (addr & 0x0c) { case GICD_TYPER2: - case GICD_IIDR: if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) return -EINVAL; return 0; + case GICD_IIDR: + reg = vgic_mmio_read_v3_misc(vcpu, addr, len); + if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) + return -EINVAL; + + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + switch (reg) { + case KVM_VGIC_IMP_REV_2: + case KVM_VGIC_IMP_REV_3: + dist->implementation_rev = reg; + return 0; + default: + return -EINVAL; + } case GICD_CTLR: /* Not a GICv4.1? No HW SGIs */ if (!kvm_vgic_global_state.has_gicv4_1) @@ -221,34 +235,58 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, vgic_put_irq(vcpu->kvm, irq); } +bool vgic_lpis_enabled(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return atomic_read(&vgic_cpu->ctlr) == GICR_CTLR_ENABLE_LPIS; +} + static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + unsigned long val; - return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0; -} + val = atomic_read(&vgic_cpu->ctlr); + if (vgic_get_implementation_rev(vcpu) >= KVM_VGIC_IMP_REV_3) + val |= GICR_CTLR_IR | GICR_CTLR_CES; + return val; +} static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - bool was_enabled = vgic_cpu->lpis_enabled; + u32 ctlr; if (!vgic_has_its(vcpu->kvm)) return; - vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; + if (!(val & GICR_CTLR_ENABLE_LPIS)) { + /* + * Don't disable if RWP is set, as there already an + * ongoing disable. Funky guest... + */ + ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, + GICR_CTLR_ENABLE_LPIS, + GICR_CTLR_RWP); + if (ctlr != GICR_CTLR_ENABLE_LPIS) + return; - if (was_enabled && !vgic_cpu->lpis_enabled) { vgic_flush_pending_lpis(vcpu); vgic_its_invalidate_cache(vcpu->kvm); - } + atomic_set_release(&vgic_cpu->ctlr, 0); + } else { + ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0, + GICR_CTLR_ENABLE_LPIS); + if (ctlr != 0) + return; - if (!was_enabled && vgic_cpu->lpis_enabled) vgic_enable_lpis(vcpu); + } } static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu) @@ -478,11 +516,10 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; u64 old_propbaser, propbaser; /* Storing a value with LPIs already enabled is undefined */ - if (vgic_cpu->lpis_enabled) + if (vgic_lpis_enabled(vcpu)) return; do { @@ -513,7 +550,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, u64 old_pendbaser, pendbaser; /* Storing a value with LPIs already enabled is undefined */ - if (vgic_cpu->lpis_enabled) + if (vgic_lpis_enabled(vcpu)) return; do { @@ -525,6 +562,63 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, pendbaser) != old_pendbaser); } +static unsigned long vgic_mmio_read_sync(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return !!atomic_read(&vcpu->arch.vgic_cpu.syncr_busy); +} + +static void vgic_set_rdist_busy(struct kvm_vcpu *vcpu, bool busy) +{ + if (busy) { + atomic_inc(&vcpu->arch.vgic_cpu.syncr_busy); + smp_mb__after_atomic(); + } else { + smp_mb__before_atomic(); + atomic_dec(&vcpu->arch.vgic_cpu.syncr_busy); + } +} + +static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_irq *irq; + + /* + * If the guest wrote only to the upper 32bit part of the + * register, drop the write on the floor, as it is only for + * vPEs (which we don't support for obvious reasons). + * + * Also discard the access if LPIs are not enabled. + */ + if ((addr & 4) || !vgic_lpis_enabled(vcpu)) + return; + + vgic_set_rdist_busy(vcpu, true); + + irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val)); + if (irq) { + vgic_its_inv_lpi(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); + } + + vgic_set_rdist_busy(vcpu, false); +} + +static void vgic_mmio_write_invall(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* See vgic_mmio_write_invlpi() for the early return rationale */ + if ((addr & 4) || !vgic_lpis_enabled(vcpu)) + return; + + vgic_set_rdist_busy(vcpu, true); + vgic_its_invall(vcpu); + vgic_set_rdist_busy(vcpu, false); +} + /* * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the * redistributors, while SPIs are covered by registers in the distributor @@ -630,6 +724,15 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_INVLPIR, + vgic_mmio_read_raz, vgic_mmio_write_invlpi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_INVALLR, + vgic_mmio_read_raz, vgic_mmio_write_invall, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_SYNCR, + vgic_mmio_read_sync, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, VGIC_ACCESS_32bit), diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b549af8b1dc2..826ff6f2a4e7 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -612,6 +612,10 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); static const struct midr_range broken_seis[] = { MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), {}, }; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 3fd6c86a7ef3..4c6bdd321faa 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -98,6 +98,11 @@ #define DEBUG_SPINLOCK_BUG_ON(p) #endif +static inline u32 vgic_get_implementation_rev(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.vgic.implementation_rev; +} + /* Requires the irq_lock to be held by the caller. */ static inline bool irq_is_pending(struct vgic_irq *irq) { @@ -308,6 +313,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE); } +bool vgic_lpis_enabled(struct kvm_vcpu *vcpu); int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr); int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq); @@ -317,6 +323,10 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm); void vgic_lpi_translation_cache_destroy(struct kvm *kvm); void vgic_its_invalidate_cache(struct kvm *kvm); +/* GICv4.1 MMIO interface */ +int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq); +int vgic_its_invall(struct kvm_vcpu *vcpu); + bool vgic_supports_direct_msis(struct kvm *kvm); int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c index 1688af0a4c97..5b7890139bc2 100644 --- a/arch/arm64/lib/delay.c +++ b/arch/arm64/lib/delay.c @@ -27,7 +27,17 @@ void __delay(unsigned long cycles) { cycles_t start = get_cycles(); - if (arch_timer_evtstrm_available()) { + if (cpus_have_const_cap(ARM64_HAS_WFXT)) { + u64 end = start + cycles; + + /* + * Start with WFIT. If an interrupt makes us resume + * early, use a WFET loop to complete the delay. + */ + wfit(end); + while ((get_cycles() - start) < cycles) + wfet(end); + } else if (arch_timer_evtstrm_available()) { const cycles_t timer_evt_period = USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8ac25f19084e..1e7b1550e2fc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(memstart_addr); * In this scheme a comparatively quicker boot is observed. * * If ZONE_DMA configs are defined, crash kernel memory reservation - * is delayed until DMA zone memory range size initilazation performed in + * is delayed until DMA zone memory range size initialization performed in * zone_sizes_init(). The defer is necessary to steer clear of DMA zone * memory range to avoid overlap allocation. So crash kernel memory boundaries * are not known when mapping all bank memory ranges, which otherwise means @@ -81,7 +81,7 @@ EXPORT_SYMBOL(memstart_addr); * so page-granularity mappings are created for the entire memory range. * Hence a slightly slower boot is observed. * - * Note: Page-granularity mapppings are necessary for crash kernel memory + * Note: Page-granularity mappings are necessary for crash kernel memory * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ #if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 3ed418f70e3b..507b20373953 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -38,11 +38,14 @@ HAS_STAGE2_FWB HAS_SYSREG_GIC_CPUIF HAS_TLB_RANGE HAS_VIRT_HOST_EXTN +HAS_WFXT HW_DBM KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE MTE MTE_ASYMM +SME +SME_FA64 SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 |