diff options
Diffstat (limited to 'arch')
420 files changed, 10562 insertions, 8683 deletions
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index 5816a31c1b38..2c89c1c55712 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/interrupt.h> +#include <linux/screen_info.h> #include <linux/io.h> /* Prototypes of functions used across modules here in this directory. */ @@ -113,6 +114,7 @@ extern int boot_cpuid; #ifdef CONFIG_VERBOSE_MCHECK extern unsigned long alpha_verbose_mcheck; #endif +extern struct screen_info vgacon_screen_info; /* srmcons.c */ #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM) diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index c80258ec332f..0738f9396f95 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -131,13 +131,14 @@ static void determine_cpu_caches (unsigned int); static char __initdata command_line[COMMAND_LINE_SIZE]; +#ifdef CONFIG_VGA_CONSOLE /* * The format of "screen_info" is strange, and due to early * i386-setup code. This is just enough to make the console * code think we're on a VGA color display. */ -struct screen_info screen_info = { +struct screen_info vgacon_screen_info = { .orig_x = 0, .orig_y = 25, .orig_video_cols = 80, @@ -145,8 +146,7 @@ struct screen_info screen_info = { .orig_video_isVGA = 1, .orig_video_points = 16 }; - -EXPORT_SYMBOL(screen_info); +#endif /* * The direct map I/O window, if any. This should be the same @@ -652,7 +652,7 @@ setup_arch(char **cmdline_p) #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; + vgacon_register_screen(&vgacon_screen_info); #endif #endif diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index 7c420d8dac53..086488ed83a7 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -57,11 +57,13 @@ sio_init_irq(void) static inline void __init alphabook1_init_arch(void) { +#ifdef CONFIG_VGA_CONSOLE /* The AlphaBook1 has LCD video fixed at 800x600, 37 rows and 100 cols. */ - screen_info.orig_y = 37; - screen_info.orig_video_cols = 100; - screen_info.orig_video_lines = 37; + vgacon_screen_info.orig_y = 37; + vgacon_screen_info.orig_video_cols = 100; + vgacon_screen_info.orig_video_lines = 37; +#endif lca_init_arch(); } diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 547e5856eaa0..5ba42f69f8ce 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -304,11 +304,7 @@ $(INSTALL_TARGETS): KBUILD_IMAGE = $(boot)/$(patsubst %install,%Image,$@) $(INSTALL_TARGETS): $(call cmd,install) -PHONY += vdso_install -vdso_install: -ifeq ($(CONFIG_VDSO),y) - $(Q)$(MAKE) $(build)=arch/arm/vdso $@ -endif +vdso-install-$(CONFIG_VDSO) += arch/arm/vdso/vdso.so.dbg # My testing targets (bypasses dependencies) bp:; $(Q)$(MAKE) $(build)=$(boot) $(boot)/bootpImage @@ -331,7 +327,6 @@ define archhelp echo ' Install using (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' - echo ' vdso_install - Install unstripped vdso.so to $$(INSTALL_MOD_PATH)/vdso' echo echo ' multi_v7_lpae_defconfig - multi_v7_defconfig with CONFIG_ARM_LPAE enabled' endef diff --git a/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi b/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi index 98cc5ea637e1..97a377b5a0ec 100644 --- a/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-msm8226.dtsi @@ -242,6 +242,17 @@ status = "disabled"; }; + blsp1_uart2: serial@f991e000 { + compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; + reg = <0xf991e000 0x1000>; + interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, + <&gcc GCC_BLSP1_AHB_CLK>; + clock-names = "core", + "iface"; + status = "disabled"; + }; + blsp1_uart3: serial@f991f000 { compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; reg = <0xf991f000 0x1000>; @@ -325,6 +336,21 @@ #size-cells = <0>; }; + blsp1_i2c6: i2c@f9928000 { + compatible = "qcom,i2c-qup-v2.1.1"; + reg = <0xf9928000 0x1000>; + interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&gcc GCC_BLSP1_QUP6_I2C_APPS_CLK>, + <&gcc GCC_BLSP1_AHB_CLK>; + clock-names = "core", + "iface"; + pinctrl-0 = <&blsp1_i2c6_pins>; + pinctrl-names = "default"; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + cci: cci@fda0c000 { compatible = "qcom,msm8226-cci"; #address-cells = <1>; @@ -472,6 +498,13 @@ bias-disable; }; + blsp1_i2c6_pins: blsp1-i2c6-state { + pins = "gpio22", "gpio23"; + function = "blsp_i2c6"; + drive-strength = <2>; + bias-disable; + }; + cci_default: cci-default-state { pins = "gpio29", "gpio30"; function = "cci_i2c0"; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index bb9f0e5b0b63..10fd74bf85f9 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1076,7 +1076,6 @@ CONFIG_QCOM_IPCC=y CONFIG_OMAP_IOMMU=y CONFIG_OMAP_IOMMU_DEBUG=y CONFIG_ROCKCHIP_IOMMU=y -CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y CONFIG_EXYNOS_IOMMU=y CONFIG_QCOM_IOMMU=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index b2f0862f4bd9..7b1b41b4b160 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -477,7 +477,6 @@ CONFIG_LIRC=y CONFIG_RC_DEVICES=y CONFIG_IR_GPIO_TX=m CONFIG_IR_PWM_TX=m -CONFIG_IR_RX51=m CONFIG_IR_SPI=m CONFIG_MEDIA_SUPPORT=m CONFIG_V4L_PLATFORM_DRIVERS=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 23c131b0854b..9e81b1849e4c 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -100,7 +100,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_AR7_PARTS=m CONFIG_MTD_CMDLINE_PARTS=m CONFIG_MTD_OF_PARTS=m CONFIG_MTD_AFS_PARTS=m diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 613f07b8ce15..8635b7216bfc 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -292,7 +292,6 @@ CONFIG_CHROME_PLATFORMS=y CONFIG_CROS_EC=y CONFIG_CROS_EC_I2C=m CONFIG_CROS_EC_SPI=m -CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y CONFIG_ARCH_TEGRA_2x_SOC=y CONFIG_ARCH_TEGRA_3x_SOC=y diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index 546af8b1e3f6..cc106f946c69 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -11,6 +11,7 @@ #ifndef __ASMARM_SETUP_H #define __ASMARM_SETUP_H +#include <linux/screen_info.h> #include <uapi/asm/setup.h> @@ -35,4 +36,8 @@ void early_mm_init(const struct machine_desc *); void adjust_lowmem_bounds(void); void setup_dma_zone(const struct machine_desc *desc); +#ifdef CONFIG_VGA_CONSOLE +extern struct screen_info vgacon_screen_info; +#endif + #endif diff --git a/arch/arm/include/asm/vga.h b/arch/arm/include/asm/vga.h index 7c0bee57855a..6c430ec371df 100644 --- a/arch/arm/include/asm/vga.h +++ b/arch/arm/include/asm/vga.h @@ -5,6 +5,7 @@ #include <linux/io.h> extern unsigned long vga_base; +extern struct screen_info vgacon_screen_info; #define VGA_MAP_MEM(x,s) (vga_base + (x)) diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 33f6eb5213a5..4ec591bde3df 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -69,18 +69,18 @@ static int __init parse_tag_mem32(const struct tag *tag) __tagtable(ATAG_MEM, parse_tag_mem32); -#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) +#if defined(CONFIG_ARCH_FOOTBRIDGE) && defined(CONFIG_VGA_CONSOLE) static int __init parse_tag_videotext(const struct tag *tag) { - screen_info.orig_x = tag->u.videotext.x; - screen_info.orig_y = tag->u.videotext.y; - screen_info.orig_video_page = tag->u.videotext.video_page; - screen_info.orig_video_mode = tag->u.videotext.video_mode; - screen_info.orig_video_cols = tag->u.videotext.video_cols; - screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx; - screen_info.orig_video_lines = tag->u.videotext.video_lines; - screen_info.orig_video_isVGA = tag->u.videotext.video_isvga; - screen_info.orig_video_points = tag->u.videotext.video_points; + vgacon_screen_info.orig_x = tag->u.videotext.x; + vgacon_screen_info.orig_y = tag->u.videotext.y; + vgacon_screen_info.orig_video_page = tag->u.videotext.video_page; + vgacon_screen_info.orig_video_mode = tag->u.videotext.video_mode; + vgacon_screen_info.orig_video_cols = tag->u.videotext.video_cols; + vgacon_screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx; + vgacon_screen_info.orig_video_lines = tag->u.videotext.video_lines; + vgacon_screen_info.orig_video_isVGA = tag->u.videotext.video_isvga; + vgacon_screen_info.orig_video_points = tag->u.videotext.video_points; return 0; } diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c index e94655ef16bb..6f9ec7d28a71 100644 --- a/arch/arm/kernel/efi.c +++ b/arch/arm/kernel/efi.c @@ -123,12 +123,6 @@ void __init arm_efi_init(void) { efi_init(); - if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI) { - /* dummycon on ARM needs non-zero values for columns/lines */ - screen_info.orig_video_cols = 80; - screen_info.orig_video_lines = 25; - } - /* ARM does not permit early mappings to persist across paging_init() */ efi_memmap_unmap(); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index b01cac05bd4c..ff2299ce1ad7 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -928,9 +928,8 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) request_resource(&ioport_resource, &lp2); } -#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) || \ - defined(CONFIG_EFI) -struct screen_info screen_info = { +#if defined(CONFIG_VGA_CONSOLE) +struct screen_info vgacon_screen_info = { .orig_video_lines = 30, .orig_video_cols = 80, .orig_video_mode = 0, @@ -1194,7 +1193,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; + vgacon_register_screen(&vgacon_screen_info); #endif #endif diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 515ca33b854c..d761bd2e2f40 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -63,28 +63,3 @@ quiet_cmd_vdsold_and_vdso_check = LD $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ - -# -# Install the unstripped copy of vdso.so.dbg. If our toolchain -# supports build-id, install .build-id links as well. -# -# Cribbed from arch/x86/vdso/Makefile. -# -quiet_cmd_vdso_install = INSTALL $< -define cmd_vdso_install - cp $< "$(MODLIB)/vdso/vdso.so"; \ - if readelf -n $< | grep -q 'Build ID'; then \ - buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ - first=`echo $$buildid | cut -b-2`; \ - last=`echo $$buildid | cut -b3-`; \ - mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ - ln -sf "../../vdso.so" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ - fi -endef - -$(MODLIB)/vdso: FORCE - @mkdir -p $(MODLIB)/vdso - -PHONY += vdso_install -vdso_install: $(obj)/vdso.so.dbg $(MODLIB)/vdso - $(call cmd,vdso_install) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2d49aea0ff67..4bd85cc0d32b 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -169,12 +169,6 @@ install: KBUILD_IMAGE := $(boot)/Image install zinstall: $(call cmd,install) -PHONY += vdso_install -vdso_install: - $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ - $(if $(CONFIG_COMPAT_VDSO), \ - $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) - archprepare: $(Q)$(MAKE) $(build)=arch/arm64/tools kapi ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) @@ -205,6 +199,9 @@ ifdef CONFIG_COMPAT_VDSO endif endif +vdso-install-y += arch/arm64/kernel/vdso/vdso.so.dbg +vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso.so.dbg:vdso32.so + include $(srctree)/scripts/Makefile.defconf PHONY += virtconfig diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 4a74f3b6d775..bf7de35ffcbc 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -859,7 +859,7 @@ pinctrl-names = "default"; pinctrl-0 = <&scp_pins>; - cros_ec { + cros-ec-rpmsg { compatible = "google,cros-ec-rpmsg"; mediatek,rpmsg-name = "cros-ec-rpmsg"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index 1447eed0ea36..f2281250ac35 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -1312,7 +1312,7 @@ pinctrl-names = "default"; pinctrl-0 = <&scp_pins>; - cros-ec { + cros-ec-rpmsg { compatible = "google,cros-ec-rpmsg"; mediatek,rpmsg-name = "cros-ec-rpmsg"; }; diff --git a/arch/arm64/boot/dts/qcom/sm8550-mtp.dts b/arch/arm64/boot/dts/qcom/sm8550-mtp.dts index 5b3488736fbe..9a70875028b7 100644 --- a/arch/arm64/boot/dts/qcom/sm8550-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sm8550-mtp.dts @@ -59,6 +59,7 @@ compatible = "qcom,sm8550-pmic-glink", "qcom,pmic-glink"; #address-cells = <1>; #size-cells = <0>; + orientation-gpios = <&tlmm 11 GPIO_ACTIVE_HIGH>; connector@0 { compatible = "usb-c-connector"; diff --git a/arch/arm64/boot/dts/qcom/sm8550-qrd.dts b/arch/arm64/boot/dts/qcom/sm8550-qrd.dts index 320662024e89..eef811def39b 100644 --- a/arch/arm64/boot/dts/qcom/sm8550-qrd.dts +++ b/arch/arm64/boot/dts/qcom/sm8550-qrd.dts @@ -77,6 +77,7 @@ compatible = "qcom,sm8550-pmic-glink", "qcom,pmic-glink"; #address-cells = <1>; #size-cells = <0>; + orientation-gpios = <&tlmm 11 GPIO_ACTIVE_HIGH>; connector@0 { compatible = "usb-c-connector"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 2993e1255042..7064c0e9179f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -443,6 +443,27 @@ status = "disabled"; }; + usb_host2_xhci: usb@fcd00000 { + compatible = "rockchip,rk3588-dwc3", "snps,dwc3"; + reg = <0x0 0xfcd00000 0x0 0x400000>; + interrupts = <GIC_SPI 222 IRQ_TYPE_LEVEL_HIGH 0>; + clocks = <&cru REF_CLK_USB3OTG2>, <&cru SUSPEND_CLK_USB3OTG2>, + <&cru ACLK_USB3OTG2>, <&cru CLK_UTMI_OTG2>, + <&cru CLK_PIPEPHY2_PIPE_U3_G>; + clock-names = "ref_clk", "suspend_clk", "bus_clk", "utmi", "pipe"; + dr_mode = "host"; + phys = <&combphy2_psu PHY_TYPE_USB3>; + phy-names = "usb3-phy"; + phy_type = "utmi_wide"; + resets = <&cru SRST_A_USB3OTG2>; + snps,dis_enblslpm_quirk; + snps,dis-u2-freeclk-exists-quirk; + snps,dis-del-phy-power-chg-quirk; + snps,dis-tx-ipgap-linecheck-quirk; + snps,dis_rxdet_inp3_quirk; + status = "disabled"; + }; + pmu1grf: syscon@fd58a000 { compatible = "rockchip,rk3588-pmugrf", "syscon", "simple-mfd"; reg = <0x0 0xfd58a000 0x0 0x10000>; diff --git a/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts b/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts index 00891a0f8fc3..9a6bd0a3c94f 100644 --- a/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts +++ b/arch/arm64/boot/dts/ti/k3-am625-beagleplay.dts @@ -896,6 +896,12 @@ pinctrl-names = "default"; pinctrl-0 = <&wifi_debug_uart_pins_default>; status = "okay"; + + mcu { + compatible = "ti,cc1352p7"; + reset-gpios = <&main_gpio0 72 GPIO_ACTIVE_LOW>; + vdds-supply = <&vdd_3v3>; + }; }; &dss { diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3f8c9c143552..0228001347be 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -71,10 +71,6 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) return pgprot_val(PAGE_KERNEL_EXEC); } -/* we will fill this structure from the stub, so don't put it in .bss */ -struct screen_info screen_info __section(".data"); -EXPORT_SYMBOL(screen_info); - int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { pteval_t prot_val = create_mapping_protection(md); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 35f3c7959513..5e4dc72ab1bd 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -27,7 +27,9 @@ PROVIDE(__efistub__text = _text); PROVIDE(__efistub__end = _end); PROVIDE(__efistub___inittext_end = __inittext_end); PROVIDE(__efistub__edata = _edata); +#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB) PROVIDE(__efistub_screen_info = screen_info); +#endif PROVIDE(__efistub__ctype = _ctype); PROVIDE(__pi___memcpy = __pi_memcpy); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index fe7a53c6781f..8818287f1095 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -78,13 +78,3 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE # Actual build commands quiet_cmd_vdsold_and_vdso_check = LD $@ cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check) - -# Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso.so: $(obj)/vdso.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 2f73e5bca213..1f911a76c5af 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -172,13 +172,3 @@ gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ # The AArch64 nm should be able to read an AArch32 binary cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -# Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL32 $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so - -vdso.so: $(obj)/vdso.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso.so diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 106fbf0b6f3b..51012e90780d 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -8,22 +8,10 @@ #include <linux/of_fdt.h> #include <linux/start_kernel.h> #include <linux/dma-map-ops.h> -#include <linux/screen_info.h> #include <asm/sections.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> -#ifdef CONFIG_DUMMY_CONSOLE -struct screen_info screen_info = { - .orig_video_lines = 30, - .orig_video_cols = 80, - .orig_video_mode = 0, - .orig_video_ega_bx = 0, - .orig_video_isVGA = 1, - .orig_video_points = 8 -}; -#endif - static void __init csky_memblock_init(void) { unsigned long lowmem_size = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); diff --git a/arch/csky/kernel/vdso/Makefile b/arch/csky/kernel/vdso/Makefile index 299e4e41ebc5..ddf784a62c11 100644 --- a/arch/csky/kernel/vdso/Makefile +++ b/arch/csky/kernel/vdso/Makefile @@ -58,13 +58,3 @@ quiet_cmd_vdsold = VDSOLD $@ # that contains the same symbols at the same offsets. quiet_cmd_so2s = SO2S $@ cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@ - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso.so: $(obj)/vdso.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso.so diff --git a/arch/hexagon/kernel/Makefile b/arch/hexagon/kernel/Makefile index e73cb321630e..3fdf937eb572 100644 --- a/arch/hexagon/kernel/Makefile +++ b/arch/hexagon/kernel/Makefile @@ -17,5 +17,3 @@ obj-y += vm_vectors.o obj-$(CONFIG_HAS_DMA) += dma.o obj-$(CONFIG_STACKTRACE) += stacktrace.o - -obj-$(CONFIG_VGA_CONSOLE) += screen_info.o diff --git a/arch/hexagon/kernel/screen_info.c b/arch/hexagon/kernel/screen_info.c deleted file mode 100644 index 1e1ceb18bafe..000000000000 --- a/arch/hexagon/kernel/screen_info.c +++ /dev/null @@ -1,3 +0,0 @@ -#include <linux/screen_info.h> - -struct screen_info screen_info; diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index fb0fada43197..b86f2ff31659 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -136,9 +136,7 @@ vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/loongarch/vdso include/generated/vdso-offsets.h endif -PHONY += vdso_install -vdso_install: - $(Q)$(MAKE) $(build)=arch/loongarch/vdso $@ +vdso-install-y += arch/loongarch/vdso/vdso.so.dbg all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 9fc10cea21e1..acb5d3385675 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -68,6 +68,11 @@ void __init efi_runtime_init(void) unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR; +#if defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON) +struct screen_info screen_info __section(".data"); +EXPORT_SYMBOL_GPL(screen_info); +#endif + static void __init init_screen_info(void) { struct screen_info *si; @@ -115,7 +120,8 @@ void __init efi_init(void) set_bit(EFI_CONFIG_TABLES, &efi.flags); - init_screen_info(); + if (IS_ENABLED(CONFIG_EFI_EARLYCON) || IS_ENABLED(CONFIG_SYSFB)) + init_screen_info(); if (boot_memmap == EFI_INVALID_TABLE_ADDR) return; diff --git a/arch/loongarch/kernel/image-vars.h b/arch/loongarch/kernel/image-vars.h index e561989d02de..5087416b9678 100644 --- a/arch/loongarch/kernel/image-vars.h +++ b/arch/loongarch/kernel/image-vars.h @@ -12,7 +12,9 @@ __efistub_kernel_entry = kernel_entry; __efistub_kernel_asize = kernel_asize; __efistub_kernel_fsize = kernel_fsize; __efistub_kernel_offset = kernel_offset; +#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB) __efistub_screen_info = screen_info; +#endif #endif diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index b35186f7b254..d183a745fb85 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -16,7 +16,6 @@ #include <linux/dmi.h> #include <linux/efi.h> #include <linux/export.h> -#include <linux/screen_info.h> #include <linux/memblock.h> #include <linux/initrd.h> #include <linux/ioport.h> @@ -57,8 +56,6 @@ #define SMBIOS_CORE_PACKAGE_OFFSET 0x23 #define LOONGSON_EFI_ENABLE (1 << 3) -struct screen_info screen_info __section(".data"); - unsigned long fw_arg0, fw_arg1, fw_arg2; DEFINE_PER_CPU(unsigned long, kernelsp); struct cpuinfo_loongarch cpu_data[NR_CPUS] __read_mostly; diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 5c97d1463328..c74c9921304f 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -83,13 +83,3 @@ $(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE obj-y += vdso.o $(obj)/vdso.o : $(obj)/vdso.so - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso.so: $(obj)/vdso.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso.so diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index caad195ba5c1..a2311c4bce6a 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -2,7 +2,6 @@ # All platforms listed in alphabetic order platform-$(CONFIG_MIPS_ALCHEMY) += alchemy/ -platform-$(CONFIG_AR7) += ar7/ platform-$(CONFIG_ATH25) += ath25/ platform-$(CONFIG_ATH79) += ath79/ platform-$(CONFIG_BCM47XX) += bcm47xx/ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index bc8421859006..76db82542519 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -202,28 +202,6 @@ config MIPS_ALCHEMY select SYS_SUPPORTS_ZBOOT select COMMON_CLK -config AR7 - bool "Texas Instruments AR7" - select BOOT_ELF32 - select COMMON_CLK - select DMA_NONCOHERENT - select CEVT_R4K - select CSRC_R4K - select IRQ_MIPS_CPU - select NO_EXCEPT_FILL - select SWAP_IO_SPACE - select SYS_HAS_CPU_MIPS32_R1 - select SYS_HAS_EARLY_PRINTK - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_MIPS16 - select SYS_SUPPORTS_ZBOOT_UART16550 - select GPIOLIB - select VLYNQ - help - Support for the Texas Instruments AR7 System-on-a-Chip - family: TNETD7100, 7200 and 7300. - config ATH25 bool "Atheros AR231x/AR531x SoC support" select CEVT_R4K diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 34e3bd71f3b0..6cfdc149d3bc 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -31,9 +31,6 @@ ifeq ($(CONFIG_RELOCATABLE),y) $(call if_changed,relocs) endif -%.ko: FORCE - @true - clean: @true diff --git a/arch/mips/ar7/Makefile b/arch/mips/ar7/Makefile deleted file mode 100644 index cd51c6c6e686..000000000000 --- a/arch/mips/ar7/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-y := \ - prom.o \ - setup.o \ - memory.o \ - irq.o \ - time.o \ - platform.o \ - gpio.o \ - clock.o diff --git a/arch/mips/ar7/Platform b/arch/mips/ar7/Platform deleted file mode 100644 index a9257cc01c3c..000000000000 --- a/arch/mips/ar7/Platform +++ /dev/null @@ -1,5 +0,0 @@ -# -# Texas Instruments AR7 -# -cflags-$(CONFIG_AR7) += -I$(srctree)/arch/mips/include/asm/mach-ar7 -load-$(CONFIG_AR7) += 0xffffffff94100000 diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c deleted file mode 100644 index c717acbc5506..000000000000 --- a/arch/mips/ar7/clock.c +++ /dev/null @@ -1,439 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org> - * Copyright (C) 2009 Florian Fainelli <florian@openwrt.org> - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/export.h> -#include <linux/delay.h> -#include <linux/gcd.h> -#include <linux/io.h> -#include <linux/err.h> -#include <linux/clkdev.h> -#include <linux/clk.h> -#include <linux/clk-provider.h> - -#include <asm/addrspace.h> -#include <asm/mach-ar7/ar7.h> - -#define BOOT_PLL_SOURCE_MASK 0x3 -#define CPU_PLL_SOURCE_SHIFT 16 -#define BUS_PLL_SOURCE_SHIFT 14 -#define USB_PLL_SOURCE_SHIFT 18 -#define DSP_PLL_SOURCE_SHIFT 22 -#define BOOT_PLL_SOURCE_AFE 0 -#define BOOT_PLL_SOURCE_BUS 0 -#define BOOT_PLL_SOURCE_REF 1 -#define BOOT_PLL_SOURCE_XTAL 2 -#define BOOT_PLL_SOURCE_CPU 3 -#define BOOT_PLL_BYPASS 0x00000020 -#define BOOT_PLL_ASYNC_MODE 0x02000000 -#define BOOT_PLL_2TO1_MODE 0x00008000 - -#define TNETD7200_CLOCK_ID_CPU 0 -#define TNETD7200_CLOCK_ID_DSP 1 -#define TNETD7200_CLOCK_ID_USB 2 - -#define TNETD7200_DEF_CPU_CLK 211000000 -#define TNETD7200_DEF_DSP_CLK 125000000 -#define TNETD7200_DEF_USB_CLK 48000000 - -struct tnetd7300_clock { - u32 ctrl; -#define PREDIV_MASK 0x001f0000 -#define PREDIV_SHIFT 16 -#define POSTDIV_MASK 0x0000001f - u32 unused1[3]; - u32 pll; -#define MUL_MASK 0x0000f000 -#define MUL_SHIFT 12 -#define PLL_MODE_MASK 0x00000001 -#define PLL_NDIV 0x00000800 -#define PLL_DIV 0x00000002 -#define PLL_STATUS 0x00000001 - u32 unused2[3]; -}; - -struct tnetd7300_clocks { - struct tnetd7300_clock bus; - struct tnetd7300_clock cpu; - struct tnetd7300_clock usb; - struct tnetd7300_clock dsp; -}; - -struct tnetd7200_clock { - u32 ctrl; - u32 unused1[3]; -#define DIVISOR_ENABLE_MASK 0x00008000 - u32 mul; - u32 prediv; - u32 postdiv; - u32 postdiv2; - u32 unused2[6]; - u32 cmd; - u32 status; - u32 cmden; - u32 padding[15]; -}; - -struct tnetd7200_clocks { - struct tnetd7200_clock cpu; - struct tnetd7200_clock dsp; - struct tnetd7200_clock usb; -}; - -struct clk_rate { - u32 rate; -}; -static struct clk_rate bus_clk = { - .rate = 125000000, -}; - -static struct clk_rate cpu_clk = { - .rate = 150000000, -}; - -static void approximate(int base, int target, int *prediv, - int *postdiv, int *mul) -{ - int i, j, k, freq, res = target; - for (i = 1; i <= 16; i++) - for (j = 1; j <= 32; j++) - for (k = 1; k <= 32; k++) { - freq = abs(base / j * i / k - target); - if (freq < res) { - res = freq; - *mul = i; - *prediv = j; - *postdiv = k; - } - } -} - -static void calculate(int base, int target, int *prediv, int *postdiv, - int *mul) -{ - int tmp_gcd, tmp_base, tmp_freq; - - for (*prediv = 1; *prediv <= 32; (*prediv)++) { - tmp_base = base / *prediv; - tmp_gcd = gcd(target, tmp_base); - *mul = target / tmp_gcd; - *postdiv = tmp_base / tmp_gcd; - if ((*mul < 1) || (*mul >= 16)) - continue; - if ((*postdiv > 0) & (*postdiv <= 32)) - break; - } - - if (base / *prediv * *mul / *postdiv != target) { - approximate(base, target, prediv, postdiv, mul); - tmp_freq = base / *prediv * *mul / *postdiv; - printk(KERN_WARNING - "Adjusted requested frequency %d to %d\n", - target, tmp_freq); - } - - printk(KERN_DEBUG "Clocks: prediv: %d, postdiv: %d, mul: %d\n", - *prediv, *postdiv, *mul); -} - -static int tnetd7300_dsp_clock(void) -{ - u32 didr1, didr2; - u8 rev = ar7_chip_rev(); - didr1 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x18)); - didr2 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x1c)); - if (didr2 & (1 << 23)) - return 0; - if ((rev >= 0x23) && (rev != 0x57)) - return 250000000; - if ((((didr2 & 0x1fff) << 10) | ((didr1 & 0xffc00000) >> 22)) - > 4208000) - return 250000000; - return 0; -} - -static int tnetd7300_get_clock(u32 shift, struct tnetd7300_clock *clock, - u32 *bootcr, u32 bus_clock) -{ - int product; - int base_clock = AR7_REF_CLOCK; - u32 ctrl = readl(&clock->ctrl); - u32 pll = readl(&clock->pll); - int prediv = ((ctrl & PREDIV_MASK) >> PREDIV_SHIFT) + 1; - int postdiv = (ctrl & POSTDIV_MASK) + 1; - int divisor = prediv * postdiv; - int mul = ((pll & MUL_MASK) >> MUL_SHIFT) + 1; - - switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) { - case BOOT_PLL_SOURCE_BUS: - base_clock = bus_clock; - break; - case BOOT_PLL_SOURCE_REF: - base_clock = AR7_REF_CLOCK; - break; - case BOOT_PLL_SOURCE_XTAL: - base_clock = AR7_XTAL_CLOCK; - break; - case BOOT_PLL_SOURCE_CPU: - base_clock = cpu_clk.rate; - break; - } - - if (*bootcr & BOOT_PLL_BYPASS) - return base_clock / divisor; - - if ((pll & PLL_MODE_MASK) == 0) - return (base_clock >> (mul / 16 + 1)) / divisor; - - if ((pll & (PLL_NDIV | PLL_DIV)) == (PLL_NDIV | PLL_DIV)) { - product = (mul & 1) ? - (base_clock * mul) >> 1 : - (base_clock * (mul - 1)) >> 2; - return product / divisor; - } - - if (mul == 16) - return base_clock / divisor; - - return base_clock * mul / divisor; -} - -static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock, - u32 *bootcr, u32 frequency) -{ - int prediv, postdiv, mul; - int base_clock = bus_clk.rate; - - switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) { - case BOOT_PLL_SOURCE_BUS: - base_clock = bus_clk.rate; - break; - case BOOT_PLL_SOURCE_REF: - base_clock = AR7_REF_CLOCK; - break; - case BOOT_PLL_SOURCE_XTAL: - base_clock = AR7_XTAL_CLOCK; - break; - case BOOT_PLL_SOURCE_CPU: - base_clock = cpu_clk.rate; - break; - } - - calculate(base_clock, frequency, &prediv, &postdiv, &mul); - - writel(((prediv - 1) << PREDIV_SHIFT) | (postdiv - 1), &clock->ctrl); - mdelay(1); - writel(4, &clock->pll); - while (readl(&clock->pll) & PLL_STATUS) - ; - writel(((mul - 1) << MUL_SHIFT) | (0xff << 3) | 0x0e, &clock->pll); - mdelay(75); -} - -static void __init tnetd7300_init_clocks(void) -{ - u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); - struct tnetd7300_clocks *clocks = - ioremap(UR8_REGS_CLOCKS, - sizeof(struct tnetd7300_clocks)); - u32 dsp_clk; - struct clk *clk; - - bus_clk.rate = tnetd7300_get_clock(BUS_PLL_SOURCE_SHIFT, - &clocks->bus, bootcr, AR7_AFE_CLOCK); - - if (*bootcr & BOOT_PLL_ASYNC_MODE) - cpu_clk.rate = tnetd7300_get_clock(CPU_PLL_SOURCE_SHIFT, - &clocks->cpu, bootcr, AR7_AFE_CLOCK); - else - cpu_clk.rate = bus_clk.rate; - - dsp_clk = tnetd7300_dsp_clock(); - if (dsp_clk == 250000000) - tnetd7300_set_clock(DSP_PLL_SOURCE_SHIFT, &clocks->dsp, - bootcr, dsp_clk); - - iounmap(clocks); - iounmap(bootcr); - - clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate); - clkdev_create(clk, "cpu", NULL); - clk = clk_register_fixed_rate(NULL, "dsp", NULL, 0, dsp_clk); - clkdev_create(clk, "dsp", NULL); -} - -static void tnetd7200_set_clock(int base, struct tnetd7200_clock *clock, - int prediv, int postdiv, int postdiv2, int mul, u32 frequency) -{ - printk(KERN_INFO - "Clocks: base = %d, frequency = %u, prediv = %d, " - "postdiv = %d, postdiv2 = %d, mul = %d\n", - base, frequency, prediv, postdiv, postdiv2, mul); - - writel(0, &clock->ctrl); - writel(DIVISOR_ENABLE_MASK | ((prediv - 1) & 0x1F), &clock->prediv); - writel((mul - 1) & 0xF, &clock->mul); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(DIVISOR_ENABLE_MASK | ((postdiv - 1) & 0x1F), &clock->postdiv); - - writel(readl(&clock->cmden) | 1, &clock->cmden); - writel(readl(&clock->cmd) | 1, &clock->cmd); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(DIVISOR_ENABLE_MASK | ((postdiv2 - 1) & 0x1F), &clock->postdiv2); - - writel(readl(&clock->cmden) | 1, &clock->cmden); - writel(readl(&clock->cmd) | 1, &clock->cmd); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(readl(&clock->ctrl) | 1, &clock->ctrl); -} - -static int tnetd7200_get_clock_base(int clock_id, u32 *bootcr) -{ - if (*bootcr & BOOT_PLL_ASYNC_MODE) - /* Async */ - switch (clock_id) { - case TNETD7200_CLOCK_ID_DSP: - return AR7_REF_CLOCK; - default: - return AR7_AFE_CLOCK; - } - else - /* Sync */ - if (*bootcr & BOOT_PLL_2TO1_MODE) - /* 2:1 */ - switch (clock_id) { - case TNETD7200_CLOCK_ID_DSP: - return AR7_REF_CLOCK; - default: - return AR7_AFE_CLOCK; - } - else - /* 1:1 */ - return AR7_REF_CLOCK; -} - - -static void __init tnetd7200_init_clocks(void) -{ - u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); - struct tnetd7200_clocks *clocks = - ioremap(AR7_REGS_CLOCKS, - sizeof(struct tnetd7200_clocks)); - int cpu_base, cpu_mul, cpu_prediv, cpu_postdiv; - int dsp_base, dsp_mul, dsp_prediv, dsp_postdiv; - int usb_base, usb_mul, usb_prediv, usb_postdiv; - struct clk *clk; - - cpu_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_CPU, bootcr); - dsp_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_DSP, bootcr); - - if (*bootcr & BOOT_PLL_ASYNC_MODE) { - printk(KERN_INFO "Clocks: Async mode\n"); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, - &dsp_prediv, &dsp_postdiv, &dsp_mul); - bus_clk.rate = - ((dsp_base / dsp_prediv) * dsp_mul) / dsp_postdiv; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, dsp_mul * 2, - bus_clk.rate); - - printk(KERN_INFO "Clocks: Setting CPU clock\n"); - calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv, - &cpu_postdiv, &cpu_mul); - cpu_clk.rate = - ((cpu_base / cpu_prediv) * cpu_mul) / cpu_postdiv; - tnetd7200_set_clock(cpu_base, &clocks->cpu, - cpu_prediv, cpu_postdiv, -1, cpu_mul, - cpu_clk.rate); - - } else - if (*bootcr & BOOT_PLL_2TO1_MODE) { - printk(KERN_INFO "Clocks: Sync 2:1 mode\n"); - - printk(KERN_INFO "Clocks: Setting CPU clock\n"); - calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv, - &cpu_postdiv, &cpu_mul); - cpu_clk.rate = ((cpu_base / cpu_prediv) * cpu_mul) - / cpu_postdiv; - tnetd7200_set_clock(cpu_base, &clocks->cpu, - cpu_prediv, cpu_postdiv, -1, cpu_mul, - cpu_clk.rate); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv, - &dsp_postdiv, &dsp_mul); - bus_clk.rate = cpu_clk.rate / 2; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, - dsp_mul * 2, bus_clk.rate); - } else { - printk(KERN_INFO "Clocks: Sync 1:1 mode\n"); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv, - &dsp_postdiv, &dsp_mul); - bus_clk.rate = ((dsp_base / dsp_prediv) * dsp_mul) - / dsp_postdiv; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, - dsp_mul * 2, bus_clk.rate); - - cpu_clk.rate = bus_clk.rate; - } - - printk(KERN_INFO "Clocks: Setting USB clock\n"); - usb_base = bus_clk.rate; - calculate(usb_base, TNETD7200_DEF_USB_CLK, &usb_prediv, - &usb_postdiv, &usb_mul); - tnetd7200_set_clock(usb_base, &clocks->usb, - usb_prediv, usb_postdiv, -1, usb_mul, - TNETD7200_DEF_USB_CLK); - - iounmap(clocks); - iounmap(bootcr); - - clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate); - clkdev_create(clk, "cpu", NULL); - clkdev_create(clk, "dsp", NULL); -} - -void __init ar7_init_clocks(void) -{ - struct clk *clk; - - switch (ar7_chip_id()) { - case AR7_CHIP_7100: - case AR7_CHIP_7200: - tnetd7200_init_clocks(); - break; - case AR7_CHIP_7300: - tnetd7300_init_clocks(); - break; - default: - break; - } - clk = clk_register_fixed_rate(NULL, "bus", NULL, 0, bus_clk.rate); - clkdev_create(clk, "bus", NULL); - /* adjust vbus clock rate */ - clk = clk_register_fixed_factor(NULL, "vbus", "bus", 0, 1, 2); - clkdev_create(clk, "vbus", NULL); - clkdev_create(clk, "cpmac", "cpmac.1"); - clkdev_create(clk, "cpmac", "cpmac.1"); -} diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c deleted file mode 100644 index 4ed833b9cc2f..000000000000 --- a/arch/mips/ar7/gpio.c +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org> - * Copyright (C) 2009-2010 Florian Fainelli <florian@openwrt.org> - */ - -#include <linux/init.h> -#include <linux/export.h> -#include <linux/gpio/driver.h> - -#include <asm/mach-ar7/ar7.h> - -#define AR7_GPIO_MAX 32 -#define TITAN_GPIO_MAX 51 - -struct ar7_gpio_chip { - void __iomem *regs; - struct gpio_chip chip; -}; - -static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT; - - return !!(readl(gpio_in) & (1 << gpio)); -} - -static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_in0 = gpch->regs + TITAN_GPIO_INPUT_0; - void __iomem *gpio_in1 = gpch->regs + TITAN_GPIO_INPUT_1; - - return readl(gpio >> 5 ? gpio_in1 : gpio_in0) & (1 << (gpio & 0x1f)); -} - -static void ar7_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_out = gpch->regs + AR7_GPIO_OUTPUT; - unsigned tmp; - - tmp = readl(gpio_out) & ~(1 << gpio); - if (value) - tmp |= 1 << gpio; - writel(tmp, gpio_out); -} - -static void titan_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_out0 = gpch->regs + TITAN_GPIO_OUTPUT_0; - void __iomem *gpio_out1 = gpch->regs + TITAN_GPIO_OUTPUT_1; - unsigned tmp; - - tmp = readl(gpio >> 5 ? gpio_out1 : gpio_out0) & ~(1 << (gpio & 0x1f)); - if (value) - tmp |= 1 << (gpio & 0x1f); - writel(tmp, gpio >> 5 ? gpio_out1 : gpio_out0); -} - -static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; - - writel(readl(gpio_dir) | (1 << gpio), gpio_dir); - - return 0; -} - -static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; - void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; - - if (gpio >= TITAN_GPIO_MAX) - return -EINVAL; - - writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) | (1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_dir1 : gpio_dir0); - return 0; -} - -static int ar7_gpio_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; - - ar7_gpio_set_value(chip, gpio, value); - writel(readl(gpio_dir) & ~(1 << gpio), gpio_dir); - - return 0; -} - -static int titan_gpio_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; - void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; - - if (gpio >= TITAN_GPIO_MAX) - return -EINVAL; - - titan_gpio_set_value(chip, gpio, value); - writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) & ~(1 << - (gpio & 0x1f)), gpio >> 5 ? gpio_dir1 : gpio_dir0); - - return 0; -} - -static struct ar7_gpio_chip ar7_gpio_chip = { - .chip = { - .label = "ar7-gpio", - .direction_input = ar7_gpio_direction_input, - .direction_output = ar7_gpio_direction_output, - .set = ar7_gpio_set_value, - .get = ar7_gpio_get_value, - .base = 0, - .ngpio = AR7_GPIO_MAX, - } -}; - -static struct ar7_gpio_chip titan_gpio_chip = { - .chip = { - .label = "titan-gpio", - .direction_input = titan_gpio_direction_input, - .direction_output = titan_gpio_direction_output, - .set = titan_gpio_set_value, - .get = titan_gpio_get_value, - .base = 0, - .ngpio = TITAN_GPIO_MAX, - } -}; - -static inline int ar7_gpio_enable_ar7(unsigned gpio) -{ - void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE; - - writel(readl(gpio_en) | (1 << gpio), gpio_en); - - return 0; -} - -static inline int ar7_gpio_enable_titan(unsigned gpio) -{ - void __iomem *gpio_en0 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_0; - void __iomem *gpio_en1 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_1; - - writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) | (1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_en1 : gpio_en0); - - return 0; -} - -int ar7_gpio_enable(unsigned gpio) -{ - return ar7_is_titan() ? ar7_gpio_enable_titan(gpio) : - ar7_gpio_enable_ar7(gpio); -} -EXPORT_SYMBOL(ar7_gpio_enable); - -static inline int ar7_gpio_disable_ar7(unsigned gpio) -{ - void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE; - - writel(readl(gpio_en) & ~(1 << gpio), gpio_en); - - return 0; -} - -static inline int ar7_gpio_disable_titan(unsigned gpio) -{ - void __iomem *gpio_en0 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_0; - void __iomem *gpio_en1 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_1; - - writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) & ~(1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_en1 : gpio_en0); - - return 0; -} - -int ar7_gpio_disable(unsigned gpio) -{ - return ar7_is_titan() ? ar7_gpio_disable_titan(gpio) : - ar7_gpio_disable_ar7(gpio); -} -EXPORT_SYMBOL(ar7_gpio_disable); - -struct titan_gpio_cfg { - u32 reg; - u32 shift; - u32 func; -}; - -static const struct titan_gpio_cfg titan_gpio_table[] = { - /* reg, start bit, mux value */ - {4, 24, 1}, - {4, 26, 1}, - {4, 28, 1}, - {4, 30, 1}, - {5, 6, 1}, - {5, 8, 1}, - {5, 10, 1}, - {5, 12, 1}, - {7, 14, 3}, - {7, 16, 3}, - {7, 18, 3}, - {7, 20, 3}, - {7, 22, 3}, - {7, 26, 3}, - {7, 28, 3}, - {7, 30, 3}, - {8, 0, 3}, - {8, 2, 3}, - {8, 4, 3}, - {8, 10, 3}, - {8, 14, 3}, - {8, 16, 3}, - {8, 18, 3}, - {8, 20, 3}, - {9, 8, 3}, - {9, 10, 3}, - {9, 12, 3}, - {9, 14, 3}, - {9, 18, 3}, - {9, 20, 3}, - {9, 24, 3}, - {9, 26, 3}, - {9, 28, 3}, - {9, 30, 3}, - {10, 0, 3}, - {10, 2, 3}, - {10, 8, 3}, - {10, 10, 3}, - {10, 12, 3}, - {10, 14, 3}, - {13, 12, 3}, - {13, 14, 3}, - {13, 16, 3}, - {13, 18, 3}, - {13, 24, 3}, - {13, 26, 3}, - {13, 28, 3}, - {13, 30, 3}, - {14, 2, 3}, - {14, 6, 3}, - {14, 8, 3}, - {14, 12, 3} -}; - -static int titan_gpio_pinsel(unsigned gpio) -{ - struct titan_gpio_cfg gpio_cfg; - u32 mux_status, pin_sel_reg, tmp; - void __iomem *pin_sel = (void __iomem *)KSEG1ADDR(AR7_REGS_PINSEL); - - if (gpio >= ARRAY_SIZE(titan_gpio_table)) - return -EINVAL; - - gpio_cfg = titan_gpio_table[gpio]; - pin_sel_reg = gpio_cfg.reg - 1; - - mux_status = (readl(pin_sel + pin_sel_reg) >> gpio_cfg.shift) & 0x3; - - /* Check the mux status */ - if (!((mux_status == 0) || (mux_status == gpio_cfg.func))) - return 0; - - /* Set the pin sel value */ - tmp = readl(pin_sel + pin_sel_reg); - tmp |= ((gpio_cfg.func & 0x3) << gpio_cfg.shift); - writel(tmp, pin_sel + pin_sel_reg); - - return 0; -} - -/* Perform minimal Titan GPIO configuration */ -static void titan_gpio_init(void) -{ - unsigned i; - - for (i = 44; i < 48; i++) { - titan_gpio_pinsel(i); - ar7_gpio_enable_titan(i); - titan_gpio_direction_input(&titan_gpio_chip.chip, i); - } -} - -int __init ar7_gpio_init(void) -{ - int ret; - struct ar7_gpio_chip *gpch; - unsigned size; - - if (!ar7_is_titan()) { - gpch = &ar7_gpio_chip; - size = 0x10; - } else { - gpch = &titan_gpio_chip; - size = 0x1f; - } - - gpch->regs = ioremap(AR7_REGS_GPIO, size); - if (!gpch->regs) { - printk(KERN_ERR "%s: failed to ioremap regs\n", - gpch->chip.label); - return -ENOMEM; - } - - ret = gpiochip_add_data(&gpch->chip, gpch); - if (ret) { - printk(KERN_ERR "%s: failed to add gpiochip\n", - gpch->chip.label); - iounmap(gpch->regs); - return ret; - } - printk(KERN_INFO "%s: registered %d GPIOs\n", - gpch->chip.label, gpch->chip.ngpio); - - if (ar7_is_titan()) - titan_gpio_init(); - - return ret; -} diff --git a/arch/mips/ar7/irq.c b/arch/mips/ar7/irq.c deleted file mode 100644 index f0a7942d393e..000000000000 --- a/arch/mips/ar7/irq.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org> - */ - -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> - -#include <asm/irq_cpu.h> -#include <asm/mipsregs.h> -#include <asm/mach-ar7/ar7.h> - -#define EXCEPT_OFFSET 0x80 -#define PACE_OFFSET 0xA0 -#define CHNLS_OFFSET 0x200 - -#define REG_OFFSET(irq, reg) ((irq) / 32 * 0x4 + reg * 0x10) -#define SEC_REG_OFFSET(reg) (EXCEPT_OFFSET + reg * 0x8) -#define SEC_SR_OFFSET (SEC_REG_OFFSET(0)) /* 0x80 */ -#define CR_OFFSET(irq) (REG_OFFSET(irq, 1)) /* 0x10 */ -#define SEC_CR_OFFSET (SEC_REG_OFFSET(1)) /* 0x88 */ -#define ESR_OFFSET(irq) (REG_OFFSET(irq, 2)) /* 0x20 */ -#define SEC_ESR_OFFSET (SEC_REG_OFFSET(2)) /* 0x90 */ -#define ECR_OFFSET(irq) (REG_OFFSET(irq, 3)) /* 0x30 */ -#define SEC_ECR_OFFSET (SEC_REG_OFFSET(3)) /* 0x98 */ -#define PIR_OFFSET (0x40) -#define MSR_OFFSET (0x44) -#define PM_OFFSET(irq) (REG_OFFSET(irq, 5)) /* 0x50 */ -#define TM_OFFSET(irq) (REG_OFFSET(irq, 6)) /* 0x60 */ - -#define REG(addr) ((u32 *)(KSEG1ADDR(AR7_REGS_IRQ) + addr)) - -#define CHNL_OFFSET(chnl) (CHNLS_OFFSET + (chnl * 4)) - -static int ar7_irq_base; - -static void ar7_unmask_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(ESR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_mask_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(ECR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_ack_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(CR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_unmask_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ESR_OFFSET)); -} - -static void ar7_mask_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ECR_OFFSET)); -} - -static void ar7_ack_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_CR_OFFSET)); -} - -static struct irq_chip ar7_irq_type = { - .name = "AR7", - .irq_unmask = ar7_unmask_irq, - .irq_mask = ar7_mask_irq, - .irq_ack = ar7_ack_irq -}; - -static struct irq_chip ar7_sec_irq_type = { - .name = "AR7", - .irq_unmask = ar7_unmask_sec_irq, - .irq_mask = ar7_mask_sec_irq, - .irq_ack = ar7_ack_sec_irq, -}; - -static void __init ar7_irq_init(int base) -{ - int i; - /* - * Disable interrupts and clear pending - */ - writel(0xffffffff, REG(ECR_OFFSET(0))); - writel(0xff, REG(ECR_OFFSET(32))); - writel(0xffffffff, REG(SEC_ECR_OFFSET)); - writel(0xffffffff, REG(CR_OFFSET(0))); - writel(0xff, REG(CR_OFFSET(32))); - writel(0xffffffff, REG(SEC_CR_OFFSET)); - - ar7_irq_base = base; - - for (i = 0; i < 40; i++) { - writel(i, REG(CHNL_OFFSET(i))); - /* Primary IRQ's */ - irq_set_chip_and_handler(base + i, &ar7_irq_type, - handle_level_irq); - /* Secondary IRQ's */ - if (i < 32) - irq_set_chip_and_handler(base + i + 40, - &ar7_sec_irq_type, - handle_level_irq); - } - - if (request_irq(2, no_action, IRQF_NO_THREAD, "AR7 cascade interrupt", - NULL)) - pr_err("Failed to request irq 2 (AR7 cascade interrupt)\n"); - if (request_irq(ar7_irq_base, no_action, IRQF_NO_THREAD, - "AR7 cascade interrupt", NULL)) { - pr_err("Failed to request irq %d (AR7 cascade interrupt)\n", - ar7_irq_base); - } - set_c0_status(IE_IRQ0); -} - -void __init arch_init_irq(void) -{ - mips_cpu_irq_init(); - ar7_irq_init(8); -} - -static void ar7_cascade(void) -{ - u32 status; - int i, irq; - - /* Primary IRQ's */ - irq = readl(REG(PIR_OFFSET)) & 0x3f; - if (irq) { - do_IRQ(ar7_irq_base + irq); - return; - } - - /* Secondary IRQ's are cascaded through primary '0' */ - writel(1, REG(CR_OFFSET(irq))); - status = readl(REG(SEC_SR_OFFSET)); - for (i = 0; i < 32; i++) { - if (status & 1) { - do_IRQ(ar7_irq_base + i + 40); - return; - } - status >>= 1; - } - - spurious_interrupt(); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - if (pending & STATUSF_IP7) /* cpu timer */ - do_IRQ(7); - else if (pending & STATUSF_IP2) /* int0 hardware line */ - ar7_cascade(); - else - spurious_interrupt(); -} diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c deleted file mode 100644 index ce8024c1a54e..000000000000 --- a/arch/mips/ar7/memory.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org> - */ -#include <linux/memblock.h> -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/pfn.h> -#include <linux/proc_fs.h> -#include <linux/string.h> -#include <linux/swap.h> - -#include <asm/bootinfo.h> -#include <asm/page.h> -#include <asm/sections.h> - -#include <asm/mach-ar7/ar7.h> - -static int __init memsize(void) -{ - u32 size = (64 << 20); - u32 *addr = (u32 *)KSEG1ADDR(AR7_SDRAM_BASE + size - 4); - u32 *kernel_end = (u32 *)KSEG1ADDR(CPHYSADDR((u32)&_end)); - u32 *tmpaddr = addr; - - while (tmpaddr > kernel_end) { - *tmpaddr = (u32)tmpaddr; - size >>= 1; - tmpaddr -= size >> 2; - } - - do { - tmpaddr += size >> 2; - if (*tmpaddr != (u32)tmpaddr) - break; - size <<= 1; - } while (size < (64 << 20)); - - writel((u32)tmpaddr, &addr); - - return size; -} - -void __init prom_meminit(void) -{ - unsigned long pages; - - pages = memsize() >> PAGE_SHIFT; - memblock_add(PHYS_OFFSET, pages << PAGE_SHIFT); -} diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c deleted file mode 100644 index 215149a85d83..000000000000 --- a/arch/mips/ar7/platform.c +++ /dev/null @@ -1,722 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org> - */ - -#include <linux/init.h> -#include <linux/types.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/platform_device.h> -#include <linux/mtd/physmap.h> -#include <linux/serial.h> -#include <linux/serial_8250.h> -#include <linux/ioport.h> -#include <linux/io.h> -#include <linux/vlynq.h> -#include <linux/leds.h> -#include <linux/string.h> -#include <linux/etherdevice.h> -#include <linux/phy.h> -#include <linux/phy_fixed.h> -#include <linux/gpio.h> -#include <linux/clk.h> - -#include <asm/addrspace.h> -#include <asm/mach-ar7/ar7.h> -#include <asm/mach-ar7/prom.h> - -/***************************************************************************** - * VLYNQ Bus - ****************************************************************************/ -struct plat_vlynq_data { - struct plat_vlynq_ops ops; - int gpio_bit; - int reset_bit; -}; - -static int vlynq_on(struct vlynq_device *dev) -{ - int ret; - struct plat_vlynq_data *pdata = dev->dev.platform_data; - - ret = gpio_request(pdata->gpio_bit, "vlynq"); - if (ret) - goto out; - - ar7_device_reset(pdata->reset_bit); - - ret = ar7_gpio_disable(pdata->gpio_bit); - if (ret) - goto out_enabled; - - ret = ar7_gpio_enable(pdata->gpio_bit); - if (ret) - goto out_enabled; - - ret = gpio_direction_output(pdata->gpio_bit, 0); - if (ret) - goto out_gpio_enabled; - - msleep(50); - - gpio_set_value(pdata->gpio_bit, 1); - - msleep(50); - - return 0; - -out_gpio_enabled: - ar7_gpio_disable(pdata->gpio_bit); -out_enabled: - ar7_device_disable(pdata->reset_bit); - gpio_free(pdata->gpio_bit); -out: - return ret; -} - -static void vlynq_off(struct vlynq_device *dev) -{ - struct plat_vlynq_data *pdata = dev->dev.platform_data; - - ar7_gpio_disable(pdata->gpio_bit); - gpio_free(pdata->gpio_bit); - ar7_device_disable(pdata->reset_bit); -} - -static struct resource vlynq_low_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_VLYNQ0, - .end = AR7_REGS_VLYNQ0 + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 29, - .end = 29, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x04000000, - .end = 0x04ffffff, - }, - { - .name = "devirq", - .flags = IORESOURCE_IRQ, - .start = 80, - .end = 111, - }, -}; - -static struct resource vlynq_high_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_VLYNQ1, - .end = AR7_REGS_VLYNQ1 + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 33, - .end = 33, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x0c000000, - .end = 0x0cffffff, - }, - { - .name = "devirq", - .flags = IORESOURCE_IRQ, - .start = 112, - .end = 143, - }, -}; - -static struct plat_vlynq_data vlynq_low_data = { - .ops = { - .on = vlynq_on, - .off = vlynq_off, - }, - .reset_bit = 20, - .gpio_bit = 18, -}; - -static struct plat_vlynq_data vlynq_high_data = { - .ops = { - .on = vlynq_on, - .off = vlynq_off, - }, - .reset_bit = 16, - .gpio_bit = 19, -}; - -static struct platform_device vlynq_low = { - .id = 0, - .name = "vlynq", - .dev = { - .platform_data = &vlynq_low_data, - }, - .resource = vlynq_low_res, - .num_resources = ARRAY_SIZE(vlynq_low_res), -}; - -static struct platform_device vlynq_high = { - .id = 1, - .name = "vlynq", - .dev = { - .platform_data = &vlynq_high_data, - }, - .resource = vlynq_high_res, - .num_resources = ARRAY_SIZE(vlynq_high_res), -}; - -/***************************************************************************** - * Flash - ****************************************************************************/ -static struct resource physmap_flash_resource = { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x10000000, - .end = 0x107fffff, -}; - -static const char *ar7_probe_types[] = { "ar7part", NULL }; - -static struct physmap_flash_data physmap_flash_data = { - .width = 2, - .part_probe_types = ar7_probe_types, -}; - -static struct platform_device physmap_flash = { - .name = "physmap-flash", - .dev = { - .platform_data = &physmap_flash_data, - }, - .resource = &physmap_flash_resource, - .num_resources = 1, -}; - -/***************************************************************************** - * Ethernet - ****************************************************************************/ -static struct resource cpmac_low_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_MAC0, - .end = AR7_REGS_MAC0 + 0x7ff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 27, - .end = 27, - }, -}; - -static struct resource cpmac_high_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_MAC1, - .end = AR7_REGS_MAC1 + 0x7ff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 41, - .end = 41, - }, -}; - -static struct fixed_phy_status fixed_phy_status __initdata = { - .link = 1, - .speed = 100, - .duplex = 1, -}; - -static struct plat_cpmac_data cpmac_low_data = { - .reset_bit = 17, - .power_bit = 20, - .phy_mask = 0x80000000, -}; - -static struct plat_cpmac_data cpmac_high_data = { - .reset_bit = 21, - .power_bit = 22, - .phy_mask = 0x7fffffff, -}; - -static u64 cpmac_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device cpmac_low = { - .id = 0, - .name = "cpmac", - .dev = { - .dma_mask = &cpmac_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &cpmac_low_data, - }, - .resource = cpmac_low_res, - .num_resources = ARRAY_SIZE(cpmac_low_res), -}; - -static struct platform_device cpmac_high = { - .id = 1, - .name = "cpmac", - .dev = { - .dma_mask = &cpmac_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &cpmac_high_data, - }, - .resource = cpmac_high_res, - .num_resources = ARRAY_SIZE(cpmac_high_res), -}; - -static void __init cpmac_get_mac(int instance, unsigned char *dev_addr) -{ - char name[5], *mac; - - sprintf(name, "mac%c", 'a' + instance); - mac = prom_getenv(name); - if (!mac && instance) { - sprintf(name, "mac%c", 'a'); - mac = prom_getenv(name); - } - - if (mac) { - if (!mac_pton(mac, dev_addr)) { - pr_warn("cannot parse mac address, using random address\n"); - eth_random_addr(dev_addr); - } - } else - eth_random_addr(dev_addr); -} - -/***************************************************************************** - * USB - ****************************************************************************/ -static struct resource usb_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_USB, - .end = AR7_REGS_USB + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 32, - .end = 32, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x03400000, - .end = 0x03401fff, - }, -}; - -static struct platform_device ar7_udc = { - .name = "ar7_udc", - .resource = usb_res, - .num_resources = ARRAY_SIZE(usb_res), -}; - -/***************************************************************************** - * LEDs - ****************************************************************************/ -static const struct gpio_led default_leds[] = { - { - .name = "status", - .gpio = 8, - .active_low = 1, - }, -}; - -static const struct gpio_led titan_leds[] = { - { .name = "status", .gpio = 8, .active_low = 1, }, - { .name = "wifi", .gpio = 13, .active_low = 1, }, -}; - -static const struct gpio_led dsl502t_leds[] = { - { - .name = "status", - .gpio = 9, - .active_low = 1, - }, - { - .name = "ethernet", - .gpio = 7, - .active_low = 1, - }, - { - .name = "usb", - .gpio = 12, - .active_low = 1, - }, -}; - -static const struct gpio_led dg834g_leds[] = { - { - .name = "ppp", - .gpio = 6, - .active_low = 1, - }, - { - .name = "status", - .gpio = 7, - .active_low = 1, - }, - { - .name = "adsl", - .gpio = 8, - .active_low = 1, - }, - { - .name = "wifi", - .gpio = 12, - .active_low = 1, - }, - { - .name = "power", - .gpio = 14, - .active_low = 1, - .default_trigger = "default-on", - }, -}; - -static const struct gpio_led fb_sl_leds[] = { - { - .name = "1", - .gpio = 7, - }, - { - .name = "2", - .gpio = 13, - .active_low = 1, - }, - { - .name = "3", - .gpio = 10, - .active_low = 1, - }, - { - .name = "4", - .gpio = 12, - .active_low = 1, - }, - { - .name = "5", - .gpio = 9, - .active_low = 1, - }, -}; - -static const struct gpio_led fb_fon_leds[] = { - { - .name = "1", - .gpio = 8, - }, - { - .name = "2", - .gpio = 3, - .active_low = 1, - }, - { - .name = "3", - .gpio = 5, - }, - { - .name = "4", - .gpio = 4, - .active_low = 1, - }, - { - .name = "5", - .gpio = 11, - .active_low = 1, - }, -}; - -static const struct gpio_led gt701_leds[] = { - { - .name = "inet:green", - .gpio = 13, - .active_low = 1, - }, - { - .name = "usb", - .gpio = 12, - .active_low = 1, - }, - { - .name = "inet:red", - .gpio = 9, - .active_low = 1, - }, - { - .name = "power:red", - .gpio = 7, - .active_low = 1, - }, - { - .name = "power:green", - .gpio = 8, - .active_low = 1, - .default_trigger = "default-on", - }, - { - .name = "ethernet", - .gpio = 10, - .active_low = 1, - }, -}; - -static struct gpio_led_platform_data ar7_led_data; - -static struct platform_device ar7_gpio_leds = { - .name = "leds-gpio", - .dev = { - .platform_data = &ar7_led_data, - } -}; - -static void __init detect_leds(void) -{ - char *prid, *usb_prod; - - /* Default LEDs */ - ar7_led_data.num_leds = ARRAY_SIZE(default_leds); - ar7_led_data.leds = default_leds; - - /* FIXME: the whole thing is unreliable */ - prid = prom_getenv("ProductID"); - usb_prod = prom_getenv("usb_prod"); - - /* If we can't get the product id from PROM, use the default LEDs */ - if (!prid) - return; - - if (strstr(prid, "Fritz_Box_FON")) { - ar7_led_data.num_leds = ARRAY_SIZE(fb_fon_leds); - ar7_led_data.leds = fb_fon_leds; - } else if (strstr(prid, "Fritz_Box_")) { - ar7_led_data.num_leds = ARRAY_SIZE(fb_sl_leds); - ar7_led_data.leds = fb_sl_leds; - } else if ((!strcmp(prid, "AR7RD") || !strcmp(prid, "AR7DB")) - && usb_prod != NULL && strstr(usb_prod, "DSL-502T")) { - ar7_led_data.num_leds = ARRAY_SIZE(dsl502t_leds); - ar7_led_data.leds = dsl502t_leds; - } else if (strstr(prid, "DG834")) { - ar7_led_data.num_leds = ARRAY_SIZE(dg834g_leds); - ar7_led_data.leds = dg834g_leds; - } else if (strstr(prid, "CYWM") || strstr(prid, "CYWL")) { - ar7_led_data.num_leds = ARRAY_SIZE(titan_leds); - ar7_led_data.leds = titan_leds; - } else if (strstr(prid, "GT701")) { - ar7_led_data.num_leds = ARRAY_SIZE(gt701_leds); - ar7_led_data.leds = gt701_leds; - } -} - -/***************************************************************************** - * Watchdog - ****************************************************************************/ -static struct resource ar7_wdt_res = { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = -1, /* Filled at runtime */ - .end = -1, /* Filled at runtime */ -}; - -static struct platform_device ar7_wdt = { - .name = "ar7_wdt", - .resource = &ar7_wdt_res, - .num_resources = 1, -}; - -/***************************************************************************** - * Init - ****************************************************************************/ -static int __init ar7_register_uarts(void) -{ -#ifdef CONFIG_SERIAL_8250 - static struct uart_port uart_port __initdata; - struct clk *bus_clk; - int res; - - memset(&uart_port, 0, sizeof(struct uart_port)); - - bus_clk = clk_get(NULL, "bus"); - if (IS_ERR(bus_clk)) - panic("unable to get bus clk"); - - uart_port.type = PORT_AR7; - uart_port.uartclk = clk_get_rate(bus_clk) / 2; - uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; - uart_port.regshift = 2; - - uart_port.line = 0; - uart_port.irq = AR7_IRQ_UART0; - uart_port.mapbase = AR7_REGS_UART0; - uart_port.membase = ioremap(uart_port.mapbase, 256); - - res = early_serial_setup(&uart_port); - if (res) - return res; - - /* Only TNETD73xx have a second serial port */ - if (ar7_has_second_uart()) { - uart_port.line = 1; - uart_port.irq = AR7_IRQ_UART1; - uart_port.mapbase = UR8_REGS_UART1; - uart_port.membase = ioremap(uart_port.mapbase, 256); - - res = early_serial_setup(&uart_port); - if (res) - return res; - } -#endif - - return 0; -} - -static void __init titan_fixup_devices(void) -{ - /* Set vlynq0 data */ - vlynq_low_data.reset_bit = 15; - vlynq_low_data.gpio_bit = 14; - - /* Set vlynq1 data */ - vlynq_high_data.reset_bit = 16; - vlynq_high_data.gpio_bit = 7; - - /* Set vlynq0 resources */ - vlynq_low_res[0].start = TITAN_REGS_VLYNQ0; - vlynq_low_res[0].end = TITAN_REGS_VLYNQ0 + 0xff; - vlynq_low_res[1].start = 33; - vlynq_low_res[1].end = 33; - vlynq_low_res[2].start = 0x0c000000; - vlynq_low_res[2].end = 0x0fffffff; - vlynq_low_res[3].start = 80; - vlynq_low_res[3].end = 111; - - /* Set vlynq1 resources */ - vlynq_high_res[0].start = TITAN_REGS_VLYNQ1; - vlynq_high_res[0].end = TITAN_REGS_VLYNQ1 + 0xff; - vlynq_high_res[1].start = 34; - vlynq_high_res[1].end = 34; - vlynq_high_res[2].start = 0x40000000; - vlynq_high_res[2].end = 0x43ffffff; - vlynq_high_res[3].start = 112; - vlynq_high_res[3].end = 143; - - /* Set cpmac0 data */ - cpmac_low_data.phy_mask = 0x40000000; - - /* Set cpmac1 data */ - cpmac_high_data.phy_mask = 0x80000000; - - /* Set cpmac0 resources */ - cpmac_low_res[0].start = TITAN_REGS_MAC0; - cpmac_low_res[0].end = TITAN_REGS_MAC0 + 0x7ff; - - /* Set cpmac1 resources */ - cpmac_high_res[0].start = TITAN_REGS_MAC1; - cpmac_high_res[0].end = TITAN_REGS_MAC1 + 0x7ff; -} - -static int __init ar7_register_devices(void) -{ - void __iomem *bootcr; - u32 val; - int res; - - res = ar7_gpio_init(); - if (res) - pr_warn("unable to register gpios: %d\n", res); - - res = ar7_register_uarts(); - if (res) - pr_err("unable to setup uart(s): %d\n", res); - - res = platform_device_register(&physmap_flash); - if (res) - pr_warn("unable to register physmap-flash: %d\n", res); - - if (ar7_is_titan()) - titan_fixup_devices(); - - ar7_device_disable(vlynq_low_data.reset_bit); - res = platform_device_register(&vlynq_low); - if (res) - pr_warn("unable to register vlynq-low: %d\n", res); - - if (ar7_has_high_vlynq()) { - ar7_device_disable(vlynq_high_data.reset_bit); - res = platform_device_register(&vlynq_high); - if (res) - pr_warn("unable to register vlynq-high: %d\n", res); - } - - if (ar7_has_high_cpmac()) { - res = fixed_phy_add(PHY_POLL, cpmac_high.id, - &fixed_phy_status); - if (!res) { - cpmac_get_mac(1, cpmac_high_data.dev_addr); - - res = platform_device_register(&cpmac_high); - if (res) - pr_warn("unable to register cpmac-high: %d\n", - res); - } else - pr_warn("unable to add cpmac-high phy: %d\n", res); - } else - cpmac_low_data.phy_mask = 0xffffffff; - - res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status); - if (!res) { - cpmac_get_mac(0, cpmac_low_data.dev_addr); - res = platform_device_register(&cpmac_low); - if (res) - pr_warn("unable to register cpmac-low: %d\n", res); - } else - pr_warn("unable to add cpmac-low phy: %d\n", res); - - detect_leds(); - res = platform_device_register(&ar7_gpio_leds); - if (res) - pr_warn("unable to register leds: %d\n", res); - - res = platform_device_register(&ar7_udc); - if (res) - pr_warn("unable to register usb slave: %d\n", res); - - /* Register watchdog only if enabled in hardware */ - bootcr = ioremap(AR7_REGS_DCL, 4); - val = readl(bootcr); - iounmap(bootcr); - if (val & AR7_WDT_HW_ENA) { - if (ar7_has_high_vlynq()) - ar7_wdt_res.start = UR8_REGS_WDT; - else - ar7_wdt_res.start = AR7_REGS_WDT; - - ar7_wdt_res.end = ar7_wdt_res.start + 0x20; - res = platform_device_register(&ar7_wdt); - if (res) - pr_warn("unable to register watchdog: %d\n", res); - } - - return 0; -} -device_initcall(ar7_register_devices); diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c deleted file mode 100644 index 5810d3993fc6..000000000000 --- a/arch/mips/ar7/prom.c +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Putting things on the screen/serial line using YAMONs facilities. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/serial_reg.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/string.h> -#include <linux/io.h> -#include <asm/bootinfo.h> -#include <asm/setup.h> - -#include <asm/mach-ar7/ar7.h> -#include <asm/mach-ar7/prom.h> - -#define MAX_ENTRY 80 - -struct env_var { - char *name; - char *value; -}; - -static struct env_var adam2_env[MAX_ENTRY]; - -char *prom_getenv(const char *name) -{ - int i; - - for (i = 0; (i < MAX_ENTRY) && adam2_env[i].name; i++) - if (!strcmp(name, adam2_env[i].name)) - return adam2_env[i].value; - - return NULL; -} -EXPORT_SYMBOL(prom_getenv); - -static void __init ar7_init_cmdline(int argc, char *argv[]) -{ - int i; - - for (i = 1; i < argc; i++) { - strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE); - if (i < (argc - 1)) - strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE); - } -} - -struct psbl_rec { - u32 psbl_size; - u32 env_base; - u32 env_size; - u32 ffs_base; - u32 ffs_size; -}; - -static const char psp_env_version[] __initconst = "TIENV0.8"; - -struct psp_env_chunk { - u8 num; - u8 ctrl; - u16 csum; - u8 len; - char data[11]; -} __packed; - -struct psp_var_map_entry { - u8 num; - char *value; -}; - -static const struct psp_var_map_entry psp_var_map[] = { - { 1, "cpufrequency" }, - { 2, "memsize" }, - { 3, "flashsize" }, - { 4, "modetty0" }, - { 5, "modetty1" }, - { 8, "maca" }, - { 9, "macb" }, - { 28, "sysfrequency" }, - { 38, "mipsfrequency" }, -}; - -/* - -Well-known variable (num is looked up in table above for matching variable name) -Example: cpufrequency=211968000 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 01 |CTRL|CHECKSUM | 01 | _2 | _1 | _1 | _9 | _6 | _8 | _0 | _0 | _0 | \0 | FF -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Name=Value pair in a single chunk -Example: NAME=VALUE -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 00 |CTRL|CHECKSUM | 01 | _N | _A | _M | _E | _0 | _V | _A | _L | _U | _E | \0 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Name=Value pair in 2 chunks (len is the number of chunks) -Example: bootloaderVersion=1.3.7.15 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 00 |CTRL|CHECKSUM | 02 | _b | _o | _o | _t | _l | _o | _a | _d | _e | _r | _V -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| _e | _r | _s | _i | _o | _n | \0 | _1 | _. | _3 | _. | _7 | _. | _1 | _5 | \0 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Data is padded with 0xFF - -*/ - -#define PSP_ENV_SIZE 4096 - -static char psp_env_data[PSP_ENV_SIZE] = { 0, }; - -static char * __init lookup_psp_var_map(u8 num) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(psp_var_map); i++) - if (psp_var_map[i].num == num) - return psp_var_map[i].value; - - return NULL; -} - -static void __init add_adam2_var(char *name, char *value) -{ - int i; - - for (i = 0; i < MAX_ENTRY; i++) { - if (!adam2_env[i].name) { - adam2_env[i].name = name; - adam2_env[i].value = value; - return; - } else if (!strcmp(adam2_env[i].name, name)) { - adam2_env[i].value = value; - return; - } - } -} - -static int __init parse_psp_env(void *psp_env_base) -{ - int i, n; - char *name, *value; - struct psp_env_chunk *chunks = (struct psp_env_chunk *)psp_env_data; - - memcpy_fromio(chunks, psp_env_base, PSP_ENV_SIZE); - - i = 1; - n = PSP_ENV_SIZE / sizeof(struct psp_env_chunk); - while (i < n) { - if ((chunks[i].num == 0xff) || ((i + chunks[i].len) > n)) - break; - value = chunks[i].data; - if (chunks[i].num) { - name = lookup_psp_var_map(chunks[i].num); - } else { - name = value; - value += strlen(name) + 1; - } - if (name) - add_adam2_var(name, value); - i += chunks[i].len; - } - return 0; -} - -static void __init ar7_init_env(struct env_var *env) -{ - int i; - struct psbl_rec *psbl = (struct psbl_rec *)(KSEG1ADDR(0x14000300)); - void *psp_env = (void *)KSEG1ADDR(psbl->env_base); - - if (strcmp(psp_env, psp_env_version) == 0) { - parse_psp_env(psp_env); - } else { - for (i = 0; i < MAX_ENTRY; i++, env++) - if (env->name) - add_adam2_var(env->name, env->value); - } -} - -static void __init console_config(void) -{ -#ifdef CONFIG_SERIAL_8250_CONSOLE - char console_string[40]; - int baud = 0; - char parity = '\0', bits = '\0', flow = '\0'; - char *s, *p; - - if (strstr(arcs_cmdline, "console=")) - return; - - s = prom_getenv("modetty0"); - if (s) { - baud = simple_strtoul(s, &p, 10); - s = p; - if (*s == ',') - s++; - if (*s) - parity = *s++; - if (*s == ',') - s++; - if (*s) - bits = *s++; - if (*s == ',') - s++; - if (*s == 'h') - flow = 'r'; - } - - if (baud == 0) - baud = 38400; - if (parity != 'n' && parity != 'o' && parity != 'e') - parity = 'n'; - if (bits != '7' && bits != '8') - bits = '8'; - - if (flow == 'r') - sprintf(console_string, " console=ttyS0,%d%c%c%c", baud, - parity, bits, flow); - else - sprintf(console_string, " console=ttyS0,%d%c%c", baud, parity, - bits); - strlcat(arcs_cmdline, console_string, COMMAND_LINE_SIZE); -#endif -} - -void __init prom_init(void) -{ - ar7_init_cmdline(fw_arg0, (char **)fw_arg1); - ar7_init_env((struct env_var *)fw_arg2); - console_config(); -} - -#define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4))) -static inline unsigned int serial_in(int offset) -{ - return readl((void *)PORT(offset)); -} - -static inline void serial_out(int offset, int value) -{ - writel(value, (void *)PORT(offset)); -} - -void prom_putchar(char c) -{ - while ((serial_in(UART_LSR) & UART_LSR_TEMT) == 0) - ; - serial_out(UART_TX, c); -} diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c deleted file mode 100644 index 352d5dbc777c..000000000000 --- a/arch/mips/ar7/setup.c +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - */ -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/pm.h> -#include <linux/time.h> - -#include <asm/reboot.h> -#include <asm/mach-ar7/ar7.h> -#include <asm/mach-ar7/prom.h> - -static void ar7_machine_restart(char *command) -{ - u32 *softres_reg = ioremap(AR7_REGS_RESET + AR7_RESET_SOFTWARE, 1); - - writel(1, softres_reg); -} - -static void ar7_machine_halt(void) -{ - while (1) - ; -} - -static void ar7_machine_power_off(void) -{ - u32 *power_reg = (u32 *)ioremap(AR7_REGS_POWER, 1); - u32 power_state = readl(power_reg) | (3 << 30); - - writel(power_state, power_reg); - ar7_machine_halt(); -} - -const char *get_system_type(void) -{ - u16 chip_id = ar7_chip_id(); - u16 titan_variant_id = titan_chip_id(); - - switch (chip_id) { - case AR7_CHIP_7100: - return "TI AR7 (TNETD7100)"; - case AR7_CHIP_7200: - return "TI AR7 (TNETD7200)"; - case AR7_CHIP_7300: - return "TI AR7 (TNETD7300)"; - case AR7_CHIP_TITAN: - switch (titan_variant_id) { - case TITAN_CHIP_1050: - return "TI AR7 (TNETV1050)"; - case TITAN_CHIP_1055: - return "TI AR7 (TNETV1055)"; - case TITAN_CHIP_1056: - return "TI AR7 (TNETV1056)"; - case TITAN_CHIP_1060: - return "TI AR7 (TNETV1060)"; - } - fallthrough; - default: - return "TI AR7 (unknown)"; - } -} - -static int __init ar7_init_console(void) -{ - return 0; -} -console_initcall(ar7_init_console); - -/* - * Initializes basic routines and structures pointers, memory size (as - * given by the bios and saves the command line. - */ -void __init plat_mem_setup(void) -{ - unsigned long io_base; - - _machine_restart = ar7_machine_restart; - _machine_halt = ar7_machine_halt; - pm_power_off = ar7_machine_power_off; - - io_base = (unsigned long)ioremap(AR7_REGS_BASE, 0x10000); - if (!io_base) - panic("Can't remap IO base!"); - set_io_port_base(io_base); - - prom_meminit(); - - printk(KERN_INFO "%s, ID: 0x%04x, Revision: 0x%02x\n", - get_system_type(), ar7_chip_id(), ar7_chip_rev()); -} diff --git a/arch/mips/ar7/time.c b/arch/mips/ar7/time.c deleted file mode 100644 index 72aa77d7087b..000000000000 --- a/arch/mips/ar7/time.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Setting up the clock on the MIPS boards. - */ - -#include <linux/init.h> -#include <linux/time.h> -#include <linux/err.h> -#include <linux/clk.h> - -#include <asm/time.h> -#include <asm/mach-ar7/ar7.h> - -void __init plat_time_init(void) -{ - struct clk *cpu_clk; - - /* Initialize ar7 clocks so the CPU clock frequency is correct */ - ar7_init_clocks(); - - cpu_clk = clk_get(NULL, "cpu"); - if (IS_ERR(cpu_clk)) { - printk(KERN_ERR "unable to get cpu clock\n"); - return; - } - - mips_hpt_frequency = clk_get_rate(cpu_clk) / 2; -} diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index 96d28f211121..09dcd2c561d9 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -13,11 +13,6 @@ #define PORT(offset) (CKSEG1ADDR(UART_BASE) + (offset)) #endif -#ifdef CONFIG_AR7 -#include <ar7.h> -#define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) -#endif - #ifdef CONFIG_MACH_INGENIC #define INGENIC_UART_BASE_ADDR (0x10030000 + 0x1000 * CONFIG_ZBOOT_INGENIC_UART) #define PORT(offset) (CKSEG1ADDR(INGENIC_UART_BASE_ADDR) + (4 * offset)) diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi index acbbe8c4664c..c5c5a094c37d 100644 --- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi @@ -366,7 +366,6 @@ rom: memory@1fc00000 { compatible = "mtd-rom"; - probe-type = "map_rom"; reg = <0x1fc00000 0x2000>; bank-width = <4>; diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi index 9c0099919db7..504e895e916e 100644 --- a/arch/mips/boot/dts/ingenic/jz4770.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -461,7 +461,6 @@ rom: memory@1fc00000 { compatible = "mtd-rom"; - probe-type = "map_rom"; reg = <0x1fc00000 0x2000>; bank-width = <4>; diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts index 129b6710b699..f9c262cc2e96 100644 --- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts +++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts @@ -8,7 +8,7 @@ / { compatible = "gnubee,gb-pc1", "mediatek,mt7621-soc"; - model = "GB-PC1"; + model = "GnuBee GB-PC1"; memory@0 { device_type = "memory"; diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts index f810cd10f4f4..b281e13f22ed 100644 --- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts +++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts @@ -8,7 +8,7 @@ / { compatible = "gnubee,gb-pc2", "mediatek,mt7621-soc"; - model = "GB-PC2"; + model = "GnuBee GB-PC2"; memory@0 { device_type = "memory"; diff --git a/arch/mips/boot/dts/ralink/mt7621.dtsi b/arch/mips/boot/dts/ralink/mt7621.dtsi index 7caed0d14f11..35a10258f235 100644 --- a/arch/mips/boot/dts/ralink/mt7621.dtsi +++ b/arch/mips/boot/dts/ralink/mt7621.dtsi @@ -300,14 +300,13 @@ compatible = "mediatek,mt7621-eth"; reg = <0x1e100000 0x10000>; - clocks = <&sysc MT7621_CLK_FE>, - <&sysc MT7621_CLK_ETH>; + clocks = <&sysc MT7621_CLK_FE>, <&sysc MT7621_CLK_ETH>; clock-names = "fe", "ethif"; #address-cells = <1>; #size-cells = <0>; - resets = <&sysc MT7621_RST_FE &sysc MT7621_RST_ETH>; + resets = <&sysc MT7621_RST_FE>, <&sysc MT7621_RST_ETH>; reset-names = "fe", "eth"; interrupt-parent = <&gic>; diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig deleted file mode 100644 index 329c60aa570a..000000000000 --- a/arch/mips/configs/ar7_defconfig +++ /dev/null @@ -1,119 +0,0 @@ -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_KERNEL_LZMA=y -CONFIG_SYSVIPC=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_ELF_CORE is not set -# CONFIG_KALLSYMS is not set -# CONFIG_VM_EVENT_COUNTERS is not set -# CONFIG_COMPAT_BRK is not set -CONFIG_AR7=y -CONFIG_HZ_100=y -CONFIG_KEXEC=y -# CONFIG_SECCOMP is not set -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_BSD_DISKLABEL=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_MROUTE=y -CONFIG_SYN_COOKIES=y -# CONFIG_INET_DIAG is not set -CONFIG_TCP_CONG_ADVANCED=y -# CONFIG_TCP_CONG_BIC is not set -# CONFIG_TCP_CONG_CUBIC is not set -CONFIG_TCP_CONG_WESTWOOD=y -# CONFIG_TCP_CONG_HTCP is not set -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -# CONFIG_BRIDGE_NETFILTER is not set -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_RAW=m -CONFIG_ATM=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_BRIDGE=y -CONFIG_VLAN_8021Q=y -CONFIG_NET_SCHED=y -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=y -CONFIG_HAMRADIO=y -CONFIG_CFG80211=m -CONFIG_MAC80211=m -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_STAA=y -CONFIG_MTD_COMPLEX_MAPPINGS=y -CONFIG_MTD_PHYSMAP=y -CONFIG_NETDEVICES=y -CONFIG_CPMAC=y -CONFIG_FIXED_PHY=y -CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_MULTILINK=y -CONFIG_PPPOATM=m -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_LEGACY_PTYS is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=2 -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_HW_RANDOM=y -CONFIG_GPIO_SYSFS=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_AR7_WDT=y -# CONFIG_USB_SUPPORT is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -# CONFIG_DNOTIFY is not set -CONFIG_PROC_KCORE=y -# CONFIG_PROC_PAGE_MONITOR is not set -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_SUMMARY=y -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_SQUASHFS=y -# CONFIG_CRYPTO_HW is not set -CONFIG_STRIP_ASM_SYMS=y -CONFIG_DEBUG_FS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="rootfstype=squashfs,jffs2" diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 1843468f84a3..00329bb5de5a 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -177,7 +177,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=m CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index fdf374574105..65adb538030d 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -70,10 +70,6 @@ CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HWMON is not set CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 83d9a8ff4270..38f17b658421 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -229,9 +229,6 @@ CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_XFS_FS=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index ae1a7793e810..6f8046024557 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -317,11 +317,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index c07e30f63d8b..16a91eeff67f 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -323,11 +323,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 0a5701020d3f..264aba29ea4f 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -323,11 +323,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 5c5e2186210c..08e1c1f2f4de 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -310,10 +310,6 @@ CONFIG_USB_LD=m CONFIG_USB_TEST=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/include/asm/mach-ar7/ar7.h b/arch/mips/include/asm/mach-ar7/ar7.h deleted file mode 100644 index 1e8621a6afa3..000000000000 --- a/arch/mips/include/asm/mach-ar7/ar7.h +++ /dev/null @@ -1,191 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org> - */ - -#ifndef __AR7_H__ -#define __AR7_H__ - -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/errno.h> - -#include <asm/addrspace.h> - -#define AR7_SDRAM_BASE 0x14000000 - -#define AR7_REGS_BASE 0x08610000 - -#define AR7_REGS_MAC0 (AR7_REGS_BASE + 0x0000) -#define AR7_REGS_GPIO (AR7_REGS_BASE + 0x0900) -/* 0x08610A00 - 0x08610BFF (512 bytes, 128 bytes / clock) */ -#define AR7_REGS_POWER (AR7_REGS_BASE + 0x0a00) -#define AR7_REGS_CLOCKS (AR7_REGS_POWER + 0x80) -#define UR8_REGS_CLOCKS (AR7_REGS_POWER + 0x20) -#define AR7_REGS_UART0 (AR7_REGS_BASE + 0x0e00) -#define AR7_REGS_USB (AR7_REGS_BASE + 0x1200) -#define AR7_REGS_RESET (AR7_REGS_BASE + 0x1600) -#define AR7_REGS_PINSEL (AR7_REGS_BASE + 0x160C) -#define AR7_REGS_VLYNQ0 (AR7_REGS_BASE + 0x1800) -#define AR7_REGS_DCL (AR7_REGS_BASE + 0x1a00) -#define AR7_REGS_VLYNQ1 (AR7_REGS_BASE + 0x1c00) -#define AR7_REGS_MDIO (AR7_REGS_BASE + 0x1e00) -#define AR7_REGS_IRQ (AR7_REGS_BASE + 0x2400) -#define AR7_REGS_MAC1 (AR7_REGS_BASE + 0x2800) - -#define AR7_REGS_WDT (AR7_REGS_BASE + 0x1f00) -#define UR8_REGS_WDT (AR7_REGS_BASE + 0x0b00) -#define UR8_REGS_UART1 (AR7_REGS_BASE + 0x0f00) - -/* Titan registers */ -#define TITAN_REGS_ESWITCH_BASE (0x08640000) -#define TITAN_REGS_MAC0 (TITAN_REGS_ESWITCH_BASE) -#define TITAN_REGS_MAC1 (TITAN_REGS_ESWITCH_BASE + 0x0800) -#define TITAN_REGS_MDIO (TITAN_REGS_ESWITCH_BASE + 0x02000) -#define TITAN_REGS_VLYNQ0 (AR7_REGS_BASE + 0x1c00) -#define TITAN_REGS_VLYNQ1 (AR7_REGS_BASE + 0x1300) - -#define AR7_RESET_PERIPHERAL 0x0 -#define AR7_RESET_SOFTWARE 0x4 -#define AR7_RESET_STATUS 0x8 - -#define AR7_RESET_BIT_CPMAC_LO 17 -#define AR7_RESET_BIT_CPMAC_HI 21 -#define AR7_RESET_BIT_MDIO 22 -#define AR7_RESET_BIT_EPHY 26 - -#define TITAN_RESET_BIT_EPHY1 28 - -/* GPIO control registers */ -#define AR7_GPIO_INPUT 0x0 -#define AR7_GPIO_OUTPUT 0x4 -#define AR7_GPIO_DIR 0x8 -#define AR7_GPIO_ENABLE 0xc -#define TITAN_GPIO_INPUT_0 0x0 -#define TITAN_GPIO_INPUT_1 0x4 -#define TITAN_GPIO_OUTPUT_0 0x8 -#define TITAN_GPIO_OUTPUT_1 0xc -#define TITAN_GPIO_DIR_0 0x10 -#define TITAN_GPIO_DIR_1 0x14 -#define TITAN_GPIO_ENBL_0 0x18 -#define TITAN_GPIO_ENBL_1 0x1c - -#define AR7_CHIP_7100 0x18 -#define AR7_CHIP_7200 0x2b -#define AR7_CHIP_7300 0x05 -#define AR7_CHIP_TITAN 0x07 -#define TITAN_CHIP_1050 0x0f -#define TITAN_CHIP_1055 0x0e -#define TITAN_CHIP_1056 0x0d -#define TITAN_CHIP_1060 0x07 - -/* Interrupts */ -#define AR7_IRQ_UART0 15 -#define AR7_IRQ_UART1 16 - -/* Clocks */ -#define AR7_AFE_CLOCK 35328000 -#define AR7_REF_CLOCK 25000000 -#define AR7_XTAL_CLOCK 24000000 - -/* DCL */ -#define AR7_WDT_HW_ENA 0x10 - -struct plat_cpmac_data { - int reset_bit; - int power_bit; - u32 phy_mask; - char dev_addr[6]; -}; - -struct plat_dsl_data { - int reset_bit_dsl; - int reset_bit_sar; -}; - -static inline int ar7_is_titan(void) -{ - return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x24)) & 0xffff) == - AR7_CHIP_TITAN; -} - -static inline u16 ar7_chip_id(void) -{ - return ar7_is_titan() ? AR7_CHIP_TITAN : (readl((void *) - KSEG1ADDR(AR7_REGS_GPIO + 0x14)) & 0xffff); -} - -static inline u16 titan_chip_id(void) -{ - unsigned int val = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + - TITAN_GPIO_INPUT_1)); - return ((val >> 12) & 0x0f); -} - -static inline u8 ar7_chip_rev(void) -{ - return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + (ar7_is_titan() ? 0x24 : - 0x14))) >> 16) & 0xff; -} - -static inline int ar7_has_high_cpmac(void) -{ - u16 chip_id = ar7_chip_id(); - switch (chip_id) { - case AR7_CHIP_7100: - case AR7_CHIP_7200: - return 0; - case AR7_CHIP_7300: - return 1; - default: - return -ENXIO; - } -} -#define ar7_has_high_vlynq ar7_has_high_cpmac -#define ar7_has_second_uart ar7_has_high_cpmac - -static inline void ar7_device_enable(u32 bit) -{ - void *reset_reg = - (void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL); - writel(readl(reset_reg) | (1 << bit), reset_reg); - msleep(20); -} - -static inline void ar7_device_disable(u32 bit) -{ - void *reset_reg = - (void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL); - writel(readl(reset_reg) & ~(1 << bit), reset_reg); - msleep(20); -} - -static inline void ar7_device_reset(u32 bit) -{ - ar7_device_disable(bit); - ar7_device_enable(bit); -} - -static inline void ar7_device_on(u32 bit) -{ - void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER); - writel(readl(power_reg) | (1 << bit), power_reg); - msleep(20); -} - -static inline void ar7_device_off(u32 bit) -{ - void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER); - writel(readl(power_reg) & ~(1 << bit), power_reg); - msleep(20); -} - -int __init ar7_gpio_init(void); -void __init ar7_init_clocks(void); - -/* Board specific GPIO functions */ -int ar7_gpio_enable(unsigned gpio); -int ar7_gpio_disable(unsigned gpio); - -#endif /* __AR7_H__ */ diff --git a/arch/mips/include/asm/mach-ar7/irq.h b/arch/mips/include/asm/mach-ar7/irq.h deleted file mode 100644 index 46bb730ea970..000000000000 --- a/arch/mips/include/asm/mach-ar7/irq.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Shamelessly copied from asm-mips/mach-emma2rh/ - * Copyright (C) 2003 by Ralf Baechle - */ -#ifndef __ASM_AR7_IRQ_H -#define __ASM_AR7_IRQ_H - -#define NR_IRQS 256 - -#include <asm/mach-generic/irq.h> - -#endif /* __ASM_AR7_IRQ_H */ diff --git a/arch/mips/include/asm/mach-ar7/prom.h b/arch/mips/include/asm/mach-ar7/prom.h deleted file mode 100644 index 9e1d20b06f57..000000000000 --- a/arch/mips/include/asm/mach-ar7/prom.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Florian Fainelli <florian@openwrt.org> - */ - -#ifndef __PROM_H__ -#define __PROM_H__ - -extern char *prom_getenv(const char *name); -extern void prom_meminit(void); - -#endif /* __PROM_H__ */ diff --git a/arch/mips/include/asm/mach-ar7/spaces.h b/arch/mips/include/asm/mach-ar7/spaces.h deleted file mode 100644 index a004d94dfbdd..000000000000 --- a/arch/mips/include/asm/mach-ar7/spaces.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle - * Copyright (C) 2000, 2002 Maciej W. Rozycki - * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc. - */ -#ifndef _ASM_AR7_SPACES_H -#define _ASM_AR7_SPACES_H - -/* - * This handles the memory map. - * We handle pages at KSEG0 for kernels with 32 bit address space. - */ -#define PAGE_OFFSET _AC(0x94000000, UL) -#define PHYS_OFFSET _AC(0x14000000, UL) - -#include <asm/mach-generic/spaces.h> - -#endif /* __ASM_AR7_SPACES_H */ diff --git a/arch/mips/include/asm/mach-loongson32/dma.h b/arch/mips/include/asm/mach-loongson32/dma.h deleted file mode 100644 index e917b3ccb2c2..000000000000 --- a/arch/mips/include/asm/mach-loongson32/dma.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 NAND platform support. - */ - -#ifndef __ASM_MACH_LOONGSON32_DMA_H -#define __ASM_MACH_LOONGSON32_DMA_H - -#define LS1X_DMA_CHANNEL0 0 -#define LS1X_DMA_CHANNEL1 1 -#define LS1X_DMA_CHANNEL2 2 - -struct plat_ls1x_dma { - int nr_channels; -}; - -extern struct plat_ls1x_dma ls1b_dma_pdata; - -#endif /* __ASM_MACH_LOONGSON32_DMA_H */ diff --git a/arch/mips/include/asm/mach-loongson32/nand.h b/arch/mips/include/asm/mach-loongson32/nand.h deleted file mode 100644 index aaf5ed19d78d..000000000000 --- a/arch/mips/include/asm/mach-loongson32/nand.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 NAND platform support. - */ - -#ifndef __ASM_MACH_LOONGSON32_NAND_H -#define __ASM_MACH_LOONGSON32_NAND_H - -#include <linux/dmaengine.h> -#include <linux/mtd/partitions.h> - -struct plat_ls1x_nand { - struct mtd_partition *parts; - unsigned int nr_parts; - - int hold_cycle; - int wait_cycle; -}; - -extern struct plat_ls1x_nand ls1b_nand_pdata; - -bool ls1x_dma_filter_fn(struct dma_chan *chan, void *param); - -#endif /* __ASM_MACH_LOONGSON32_NAND_H */ diff --git a/arch/mips/include/asm/mach-loongson32/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h index 2cdcfb5f6012..f74292b13bc3 100644 --- a/arch/mips/include/asm/mach-loongson32/platform.h +++ b/arch/mips/include/asm/mach-loongson32/platform.h @@ -8,9 +8,6 @@ #include <linux/platform_device.h> -#include <dma.h> -#include <nand.h> - extern struct platform_device ls1x_uart_pdev; extern struct platform_device ls1x_eth0_pdev; extern struct platform_device ls1x_eth1_pdev; diff --git a/arch/mips/jazz/setup.c b/arch/mips/jazz/setup.c index 04aab419a0fc..e318ea11c858 100644 --- a/arch/mips/jazz/setup.c +++ b/arch/mips/jazz/setup.c @@ -13,7 +13,6 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/console.h> -#include <linux/screen_info.h> #include <linux/platform_device.h> #include <linux/serial_8250.h> #include <linux/dma-mapping.h> @@ -76,14 +75,6 @@ void __init plat_mem_setup(void) _machine_restart = jazz_machine_restart; -#ifdef CONFIG_VT - screen_info = (struct screen_info) { - .orig_video_cols = 160, - .orig_video_lines = 64, - .orig_video_points = 16, - }; -#endif - add_preferred_console("ttyS", 0, "9600"); } diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index f5b2ef979b43..8f0a7263a9d6 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -66,7 +66,6 @@ copy_word: LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word - b process_entry done: #ifdef CONFIG_SMP diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 08321c945ac4..2d2ca024bd47 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -15,7 +15,6 @@ #include <linux/delay.h> #include <linux/ioport.h> #include <linux/export.h> -#include <linux/screen_info.h> #include <linux/memblock.h> #include <linux/initrd.h> #include <linux/root_dev.h> @@ -54,10 +53,6 @@ struct cpuinfo_mips cpu_data[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_data); -#ifdef CONFIG_VT -struct screen_info screen_info; -#endif - /* * Setup information * @@ -793,12 +788,6 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) check_bugs64_early(); -#if defined(CONFIG_VT) -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#endif -#endif - arch_mem_init(cmdline_p); dmi_setup(); diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c index 8075590a9f83..623eb4bc7b41 100644 --- a/arch/mips/loongson32/common/platform.c +++ b/arch/mips/loongson32/common/platform.c @@ -15,8 +15,6 @@ #include <platform.h> #include <loongson1.h> -#include <dma.h> -#include <nand.h> /* 8250/16550 compatible UART */ #define LS1X_UART(_id) \ diff --git a/arch/mips/loongson32/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c index fed8d432ef20..fe115bdcb22c 100644 --- a/arch/mips/loongson32/ls1b/board.c +++ b/arch/mips/loongson32/ls1b/board.c @@ -8,8 +8,6 @@ #include <linux/sizes.h> #include <loongson1.h> -#include <dma.h> -#include <nand.h> #include <platform.h> static const struct gpio_led ls1x_gpio_leds[] __initconst = { diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 21cb3ac1237b..3a2836e9d856 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -161,7 +161,7 @@ static void __init pci_clock_check(void) #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) static void __init screen_info_setup(void) { - screen_info = (struct screen_info) { + static struct screen_info si = { .orig_x = 0, .orig_y = 25, .ext_mem_k = 0, @@ -175,6 +175,8 @@ static void __init screen_info_setup(void) .orig_video_isVGA = VIDEO_TYPE_VGAC, .orig_video_points = 16 }; + + vgacon_register_screen(&si); } #endif diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c index 105569c1b712..13009666204f 100644 --- a/arch/mips/pci/fixup-lantiq.c +++ b/arch/mips/pci/fixup-lantiq.c @@ -4,8 +4,8 @@ * Copyright (C) 2012 John Crispin <john@phrozen.org> */ -#include <linux/of_irq.h> #include <linux/of_pci.h> +#include <linux/pci.h> int (*ltq_pci_plat_arch_init)(struct pci_dev *dev) = NULL; int (*ltq_pci_plat_dev_init)(struct pci_dev *dev) = NULL; diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 76683993cdd3..38c90b5e8754 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -13,6 +13,7 @@ #include <linux/memblock.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/console.h> #include <linux/screen_info.h> #include <linux/initrd.h> @@ -112,6 +113,19 @@ int update_persistent_clock64(struct timespec64 now) } } +#ifdef CONFIG_VGA_CONSOLE +static struct screen_info vgacon_screen_info = { + .orig_video_page = 52, + .orig_video_mode = 3, + .orig_video_cols = 80, + .flags = 12, + .orig_video_ega_bx = 3, + .orig_video_lines = 25, + .orig_video_isVGA = 0x22, + .orig_video_points = 16, +}; +#endif + void __init plat_mem_setup(void) { #ifdef CONFIG_SIBYTE_BCM1x80 @@ -129,17 +143,8 @@ void __init plat_mem_setup(void) if (m41t81_probe()) swarm_rtc_type = RTC_M41T81; -#ifdef CONFIG_VT - screen_info = (struct screen_info) { - .orig_video_page = 52, - .orig_video_mode = 3, - .orig_video_cols = 80, - .flags = 12, - .orig_video_ega_bx = 3, - .orig_video_lines = 25, - .orig_video_isVGA = 0x22, - .orig_video_points = 16, - }; +#ifdef CONFIG_VGA_CONSOLE + vgacon_register_screen(&vgacon_screen_info); /* XXXKW for CFE, get lines/cols from environment */ #endif } diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index efad85c8c823..42fdb939c88d 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -38,19 +38,21 @@ extern void sni_machine_power_off(void); static void __init sni_display_setup(void) { -#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) && defined(CONFIG_FW_ARC) - struct screen_info *si = &screen_info; +#if defined(CONFIG_VGA_CONSOLE) && defined(CONFIG_FW_ARC) + static struct screen_info si; DISPLAY_STATUS *di; di = ArcGetDisplayStatus(1); if (di) { - si->orig_x = di->CursorXPosition; - si->orig_y = di->CursorYPosition; - si->orig_video_cols = di->CursorMaxXPosition; - si->orig_video_lines = di->CursorMaxYPosition; - si->orig_video_isVGA = VIDEO_TYPE_VGAC; - si->orig_video_points = 16; + si.orig_x = di->CursorXPosition; + si.orig_y = di->CursorYPosition; + si.orig_video_cols = di->CursorMaxXPosition; + si.orig_video_lines = di->CursorMaxYPosition; + si.orig_video_isVGA = VIDEO_TYPE_VGAC; + si.orig_video_points = 16; + + vgacon_register_screen(&si); } #endif } diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 8582ed965844..da122a5fa43b 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -19,7 +19,6 @@ #include <linux/memblock.h> #include <linux/initrd.h> #include <linux/of_fdt.h> -#include <linux/screen_info.h> #include <asm/mmu_context.h> #include <asm/sections.h> @@ -36,10 +35,6 @@ static struct pt_regs fake_regs = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -#ifdef CONFIG_VT -struct screen_info screen_info; -#endif - /* Copy a short hook instruction sequence to the exception address */ static inline void copy_exception_handler(unsigned int addr) { diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 968ebe17494c..920db57b6b4c 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -177,12 +177,8 @@ vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h endif -PHONY += vdso_install - -vdso_install: - $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso $@ - $(if $(CONFIG_COMPAT_VDSO), \ - $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 $@) +vdso-install-y += arch/parisc/kernel/vdso32/vdso32.so +vdso-install-$(CONFIG_64BIT) += arch/parisc/kernel/vdso64/vdso64.so install: KBUILD_IMAGE := vmlinux zinstall: KBUILD_IMAGE := vmlinuz diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index f6ded7147b4d..19a804860ed5 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -248,7 +248,6 @@ CONFIG_UIO_AEC=m CONFIG_UIO_SERCOS3=m CONFIG_UIO_PCI_GENERIC=m CONFIG_STAGING=y -CONFIG_QLGE=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d5d5388973ac..6f105ee4f3cf 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -237,6 +237,7 @@ config PPC select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 2a54fadbeaf5..ea4033abc07d 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -82,6 +82,18 @@ config MSI_BITMAP_SELFTEST bool "Run self-tests of the MSI bitmap code" depends on DEBUG_KERNEL +config GUEST_STATE_BUFFER_TEST + def_tristate n + prompt "Enable Guest State Buffer unit tests" + depends on KUNIT + depends on KVM_BOOK3S_HV_POSSIBLE + default KUNIT_ALL_TESTS + help + The Guest State Buffer is a data format specified in the PAPR. + It is by hcalls to communicate the state of L2 guests between + the L1 and L0 hypervisors. Enable unit tests for the library + used to create and use guest state buffers. + config PPC_IRQ_SOFT_MASK_DEBUG bool "Include extra checks for powerpc irq soft masking" depends on PPC64 @@ -147,6 +159,8 @@ config BDI_SWITCH config BOOTX_TEXT bool "Support for early boot text console (BootX or OpenFirmware only)" depends on PPC_BOOK3S + select FONT_SUN8x16 + select FONT_SUPPORT help Say Y here to see progress messages from the boot firmware in text mode. Requires either BootX or Open Firmware. diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index 1f860b3c9bec..ae5a4256b03d 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -35,9 +35,6 @@ ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) endif -%.ko: FORCE - @true - clean: rm -f .tmp_symbols.txt diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh index 461902c8a46d..101fcb397a0f 100755 --- a/arch/powerpc/boot/install.sh +++ b/arch/powerpc/boot/install.sh @@ -21,13 +21,17 @@ set -e # this should work for both the pSeries zImage and the iSeries vmlinux.sm image_name=`basename $2` -if [ -f $4/$image_name ]; then - mv $4/$image_name $4/$image_name.old + +echo "Warning: '${INSTALLKERNEL}' command not available... Copying" \ + "directly to $4/$image_name-$1" >&2 + +if [ -f $4/$image_name-$1 ]; then + mv $4/$image_name-$1 $4/$image_name-$1.old fi -if [ -f $4/System.map ]; then - mv $4/System.map $4/System.old +if [ -f $4/System.map-$1 ]; then + mv $4/System.map-$1 $4/System-$1.old fi -cat $2 > $4/$image_name -cp $3 $4/System.map +cat $2 > $4/$image_name-$1 +cp $3 $4/System.map-$1 diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 51499ee6366b..2479ab62d12f 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -78,7 +78,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_REISERFS_FS=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/powerpc/configs/debug.config b/arch/powerpc/configs/debug.config index a14ae1f20d60..bcc1fcf25e10 100644 --- a/arch/powerpc/configs/debug.config +++ b/arch/powerpc/configs/debug.config @@ -1 +1,5 @@ +CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG=y +CONFIG_PPC_IRQ_SOFT_MASK_DEBUG=y +CONFIG_PPC_KUAP_DEBUG=y +CONFIG_PPC_RFI_SRR_DEBUG=y CONFIG_SCOM_DEBUGFS=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 71d9d112c0b6..9215bed53291 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -202,10 +202,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_POSIX_ACL=y CONFIG_FS_DAX=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index a205da9ee5f2..57ded82c2840 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -138,7 +138,6 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_ADB=y -CONFIG_ADB_CUDA=y CONFIG_ADB_PMU=y CONFIG_ADB_PMU_LED=y CONFIG_ADB_PMU_LED_DISK=y @@ -181,6 +180,7 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y CONFIG_SERIAL_PMACZILOG_CONSOLE=y CONFIG_NVRAM=y CONFIG_I2C_CHARDEV=m +CONFIG_POWER_RESET=y CONFIG_APM_POWER=y CONFIG_BATTERY_PMU=y CONFIG_HWMON=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 624c371ffcc3..4c05f4e4d505 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -175,10 +175,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=y CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index eaf3273372a9..f279703425d4 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -954,11 +954,6 @@ CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_JBD2_DEBUG=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 9b13eb14e21b..52971ee30717 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -20,7 +20,7 @@ #define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ #define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_READ 0x004 /* software: read access allowed */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -28,7 +28,7 @@ #define _PAGE_DIRTY 0x080 /* C: page changed */ #define _PAGE_ACCESSED 0x100 /* R: page referenced */ #define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_WRITE 0x400 /* software: user write access allowed */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ #ifdef CONFIG_PTE_64BIT @@ -42,26 +42,13 @@ #define _PMD_PRESENT_MASK (PAGE_MASK) #define _PMD_BAD (~PAGE_MASK) -/* We borrow the _PAGE_USER bit to store the exclusive marker in swap PTEs. */ -#define _PAGE_SWP_EXCLUSIVE _PAGE_USER +/* We borrow the _PAGE_READ bit to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_READ /* And here we include common definitions */ -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX (_PAGE_EXEC) -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - #define _PAGE_HPTEFLAGS _PAGE_HASHPTE -#ifndef __ASSEMBLY__ - -static inline bool pte_user(pte_t pte) -{ - return pte_val(pte) & _PAGE_USER; -} -#endif /* __ASSEMBLY__ */ - /* * Location of the PFN in the PTE. Most 32-bit platforms use the same * as _PAGE_SHIFT here (ie, naturally aligned). @@ -97,20 +84,7 @@ static inline bool pte_user(pte_t pte) #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) -/* - * Permission masks used to generate the __P and __S table. - * - * Note:__pgprot is defined in arch/powerpc/include/asm/page.h - * - * Write permissions imply read permissions for now. - */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include <asm/pgtable-masks.h> /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) @@ -170,7 +144,14 @@ void unmap_kernel_page(unsigned long va); * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ -#include <asm/fixmap.h> + +#define FIXADDR_SIZE 0 +#ifdef CONFIG_KASAN +#include <asm/kasan.h> +#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) +#else +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) +#endif /* * ioremap_bot starts at that address. Early ioremaps move down from there, @@ -224,9 +205,6 @@ void unmap_kernel_page(unsigned long va); /* Bits to mask out from a PGD to get to the PUD page */ #define PGD_MASKED_BITS 0 -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ - (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* @@ -343,7 +321,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -402,8 +380,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) } /* Generic accessors to PTE bits */ -static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} -static inline int pte_read(pte_t pte) { return 1; } +static inline bool pte_read(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_READ); +} + +static inline bool pte_write(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_WRITE); +} + static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } @@ -438,10 +424,10 @@ static inline bool pte_ci(pte_t pte) static inline bool pte_access_permitted(pte_t pte, bool write) { /* - * A read-only access is controlled by _PAGE_USER bit. - * We have _PAGE_READ set for WRITE and EXECUTE + * A read-only access is controlled by _PAGE_READ bit. + * We have _PAGE_READ set for WRITE */ - if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) + if (!pte_present(pte) || !pte_read(pte)) return false; if (write && !pte_write(pte)) @@ -465,7 +451,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_RW); + return __pte(pte_val(pte) & ~_PAGE_WRITE); } static inline pte_t pte_exprotect(pte_t pte) @@ -495,6 +481,9 @@ static inline pte_t pte_mkpte(pte_t pte) static inline pte_t pte_mkwrite_novma(pte_t pte) { + /* + * write implies read, hence set both + */ return __pte(pte_val(pte) | _PAGE_RW); } @@ -518,16 +507,6 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_USER); -} - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_USER); -} - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 4be572908124..e43534da5207 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -80,7 +80,7 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, static inline void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { - BUILD_BUG(); + flush_range(mm, vmaddr, vmaddr); } static inline void local_flush_tlb_mm(struct mm_struct *mm) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 5c497c862d75..cb77eddca54b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -17,6 +17,10 @@ #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ #define _PAGE_READ 0x00004 /* read access allowed */ +#define _PAGE_NA _PAGE_PRIVILEGED +#define _PAGE_NAX _PAGE_EXEC +#define _PAGE_RO _PAGE_READ +#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ @@ -136,23 +140,7 @@ #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC) -/* Permission masks used to generate the __P and __S table, - * - * Note:__pgprot is defined in arch/powerpc/include/asm/page.h - * - * Write permissions imply read permissions for now (we could make write-only - * pages on BookE but we don't bother for now). Execute permission control is - * possible on platforms that define _PAGE_EXEC - */ -#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_READ) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) -/* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey instead */ -#define PAGE_EXECONLY __pgprot(_PAGE_BASE | _PAGE_EXEC) +#include <asm/pgtable-masks.h> /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) @@ -316,6 +304,7 @@ extern unsigned long pci_io_base; #define IOREMAP_START (ioremap_bot) #define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M +#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) #ifndef __ASSEMBLY__ @@ -629,16 +618,6 @@ static inline pte_t pte_mkdevmap(pte_t pte) return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP)); } -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED)); -} - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED)); -} - /* * This is potentially called with a pmd as the argument, in which case it's not * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set. @@ -647,7 +626,7 @@ static inline pte_t pte_mkuser(pte_t pte) */ static inline int pte_devmap(pte_t pte) { - u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE); + __be64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE); return (pte_raw(pte) & mask) == mask; } @@ -1014,8 +993,6 @@ static inline pmd_t *pud_pgtable(pud_t pud) return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS); } -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_ERROR(e) \ diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index 3b7bd36a2321..f42d68c6b314 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -8,37 +8,4 @@ #include <asm/book3s/32/pgtable.h> #endif -#ifndef __ASSEMBLY__ -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, pte_t entry, int dirty); - -struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#define __HAVE_PHYS_MEM_ACCESS_PROT - -void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); - -/* - * This gets called at the end of handling a page fault, when - * the kernel has put a new PTE into the page table for the process. - * We use it to ensure coherency between the i-cache and d-cache - * for the page which has just been mapped in. - * On machines which use an MMU hash table, we use this to put a - * corresponding HPTE into the hash table ahead of time, instead of - * waiting for the inevitable extra hash-table miss exception. - */ -static inline void update_mmu_cache_range(struct vm_fault *vmf, - struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, unsigned int nr) -{ - if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE)) - return; - if (radix_enabled()) - return; - __update_mmu_cache(vma, address, ptep); -} - -#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 3f881548fb61..0e29ccf903d0 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, int patch_branch(u32 *addr, unsigned long target, int flags); int patch_instruction(u32 *addr, ppc_inst_t instr); int raw_patch_instruction(u32 *addr, ppc_inst_t instr); +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr); static inline unsigned long patch_site_addr(s32 *site) { diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index 3bdd74739cb8..e3c6969853ef 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -49,11 +49,6 @@ */ extern cpm8xx_t __iomem *cpmp; /* Pointer to comm processor */ -#define cpm_dpalloc cpm_muram_alloc -#define cpm_dpfree cpm_muram_free -#define cpm_dpram_addr cpm_muram_addr -#define cpm_dpram_phys cpm_muram_dma - extern void cpm_setbrg(uint brg, uint rate); extern void __init cpm_load_patch(cpm8xx_t *cp); diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index 249d43cc6427..a22acc36eb9b 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -87,10 +87,6 @@ */ extern cpm_cpm2_t __iomem *cpmp; /* Pointer to comm processor */ -#define cpm_dpalloc cpm_muram_alloc -#define cpm_dpfree cpm_muram_free -#define cpm_dpram_addr cpm_muram_addr - extern void cpm2_reset(void); /* Baud rate generators. diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index a832aeafe560..f9068dd8dfce 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -23,18 +23,6 @@ #include <asm/kmap_size.h> #endif -#ifdef CONFIG_PPC64 -#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) -#else -#define FIXADDR_SIZE 0 -#ifdef CONFIG_KASAN -#include <asm/kasan.h> -#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) -#else -#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) -#endif -#endif - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at @@ -119,5 +107,9 @@ static inline void __set_fixmap(enum fixed_addresses idx, #define __early_set_fixmap __set_fixmap +#ifdef CONFIG_PPC_8xx +#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) +#endif + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h new file mode 100644 index 000000000000..808149f31576 --- /dev/null +++ b/arch/powerpc/include/asm/guest-state-buffer.h @@ -0,0 +1,995 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Interface based on include/net/netlink.h + */ +#ifndef _ASM_POWERPC_GUEST_STATE_BUFFER_H +#define _ASM_POWERPC_GUEST_STATE_BUFFER_H + +#include "asm/hvcall.h" +#include <linux/gfp.h> +#include <linux/bitmap.h> +#include <asm/plpar_wrappers.h> + +/************************************************************************** + * Guest State Buffer Constants + **************************************************************************/ +/* Element without a value and any length */ +#define KVMPPC_GSID_BLANK 0x0000 +/* Size required for the L0's internal VCPU representation */ +#define KVMPPC_GSID_HOST_STATE_SIZE 0x0001 + /* Minimum size for the H_GUEST_RUN_VCPU output buffer */ +#define KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE 0x0002 + /* "Logical" PVR value as defined in the PAPR */ +#define KVMPPC_GSID_LOGICAL_PVR 0x0003 + /* L0 relative timebase offset */ +#define KVMPPC_GSID_TB_OFFSET 0x0004 + /* Partition Scoped Page Table Info */ +#define KVMPPC_GSID_PARTITION_TABLE 0x0005 + /* Process Table Info */ +#define KVMPPC_GSID_PROCESS_TABLE 0x0006 + +/* H_GUEST_RUN_VCPU input buffer Info */ +#define KVMPPC_GSID_RUN_INPUT 0x0C00 +/* H_GUEST_RUN_VCPU output buffer Info */ +#define KVMPPC_GSID_RUN_OUTPUT 0x0C01 +#define KVMPPC_GSID_VPA 0x0C02 + +#define KVMPPC_GSID_GPR(x) (0x1000 + (x)) +#define KVMPPC_GSID_HDEC_EXPIRY_TB 0x1020 +#define KVMPPC_GSID_NIA 0x1021 +#define KVMPPC_GSID_MSR 0x1022 +#define KVMPPC_GSID_LR 0x1023 +#define KVMPPC_GSID_XER 0x1024 +#define KVMPPC_GSID_CTR 0x1025 +#define KVMPPC_GSID_CFAR 0x1026 +#define KVMPPC_GSID_SRR0 0x1027 +#define KVMPPC_GSID_SRR1 0x1028 +#define KVMPPC_GSID_DAR 0x1029 +#define KVMPPC_GSID_DEC_EXPIRY_TB 0x102A +#define KVMPPC_GSID_VTB 0x102B +#define KVMPPC_GSID_LPCR 0x102C +#define KVMPPC_GSID_HFSCR 0x102D +#define KVMPPC_GSID_FSCR 0x102E +#define KVMPPC_GSID_FPSCR 0x102F +#define KVMPPC_GSID_DAWR0 0x1030 +#define KVMPPC_GSID_DAWR1 0x1031 +#define KVMPPC_GSID_CIABR 0x1032 +#define KVMPPC_GSID_PURR 0x1033 +#define KVMPPC_GSID_SPURR 0x1034 +#define KVMPPC_GSID_IC 0x1035 +#define KVMPPC_GSID_SPRG0 0x1036 +#define KVMPPC_GSID_SPRG1 0x1037 +#define KVMPPC_GSID_SPRG2 0x1038 +#define KVMPPC_GSID_SPRG3 0x1039 +#define KVMPPC_GSID_PPR 0x103A +#define KVMPPC_GSID_MMCR(x) (0x103B + (x)) +#define KVMPPC_GSID_MMCRA 0x103F +#define KVMPPC_GSID_SIER(x) (0x1040 + (x)) +#define KVMPPC_GSID_BESCR 0x1043 +#define KVMPPC_GSID_EBBHR 0x1044 +#define KVMPPC_GSID_EBBRR 0x1045 +#define KVMPPC_GSID_AMR 0x1046 +#define KVMPPC_GSID_IAMR 0x1047 +#define KVMPPC_GSID_AMOR 0x1048 +#define KVMPPC_GSID_UAMOR 0x1049 +#define KVMPPC_GSID_SDAR 0x104A +#define KVMPPC_GSID_SIAR 0x104B +#define KVMPPC_GSID_DSCR 0x104C +#define KVMPPC_GSID_TAR 0x104D +#define KVMPPC_GSID_DEXCR 0x104E +#define KVMPPC_GSID_HDEXCR 0x104F +#define KVMPPC_GSID_HASHKEYR 0x1050 +#define KVMPPC_GSID_HASHPKEYR 0x1051 +#define KVMPPC_GSID_CTRL 0x1052 + +#define KVMPPC_GSID_CR 0x2000 +#define KVMPPC_GSID_PIDR 0x2001 +#define KVMPPC_GSID_DSISR 0x2002 +#define KVMPPC_GSID_VSCR 0x2003 +#define KVMPPC_GSID_VRSAVE 0x2004 +#define KVMPPC_GSID_DAWRX0 0x2005 +#define KVMPPC_GSID_DAWRX1 0x2006 +#define KVMPPC_GSID_PMC(x) (0x2007 + (x)) +#define KVMPPC_GSID_WORT 0x200D +#define KVMPPC_GSID_PSPB 0x200E + +#define KVMPPC_GSID_VSRS(x) (0x3000 + (x)) + +#define KVMPPC_GSID_HDAR 0xF000 +#define KVMPPC_GSID_HDSISR 0xF001 +#define KVMPPC_GSID_HEIR 0xF002 +#define KVMPPC_GSID_ASDR 0xF003 + +#define KVMPPC_GSE_GUESTWIDE_START KVMPPC_GSID_BLANK +#define KVMPPC_GSE_GUESTWIDE_END KVMPPC_GSID_PROCESS_TABLE +#define KVMPPC_GSE_GUESTWIDE_COUNT \ + (KVMPPC_GSE_GUESTWIDE_END - KVMPPC_GSE_GUESTWIDE_START + 1) + +#define KVMPPC_GSE_META_START KVMPPC_GSID_RUN_INPUT +#define KVMPPC_GSE_META_END KVMPPC_GSID_VPA +#define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1) + +#define KVMPPC_GSE_DW_REGS_START KVMPPC_GSID_GPR(0) +#define KVMPPC_GSE_DW_REGS_END KVMPPC_GSID_CTRL +#define KVMPPC_GSE_DW_REGS_COUNT \ + (KVMPPC_GSE_DW_REGS_END - KVMPPC_GSE_DW_REGS_START + 1) + +#define KVMPPC_GSE_W_REGS_START KVMPPC_GSID_CR +#define KVMPPC_GSE_W_REGS_END KVMPPC_GSID_PSPB +#define KVMPPC_GSE_W_REGS_COUNT \ + (KVMPPC_GSE_W_REGS_END - KVMPPC_GSE_W_REGS_START + 1) + +#define KVMPPC_GSE_VSRS_START KVMPPC_GSID_VSRS(0) +#define KVMPPC_GSE_VSRS_END KVMPPC_GSID_VSRS(63) +#define KVMPPC_GSE_VSRS_COUNT (KVMPPC_GSE_VSRS_END - KVMPPC_GSE_VSRS_START + 1) + +#define KVMPPC_GSE_INTR_REGS_START KVMPPC_GSID_HDAR +#define KVMPPC_GSE_INTR_REGS_END KVMPPC_GSID_ASDR +#define KVMPPC_GSE_INTR_REGS_COUNT \ + (KVMPPC_GSE_INTR_REGS_END - KVMPPC_GSE_INTR_REGS_START + 1) + +#define KVMPPC_GSE_IDEN_COUNT \ + (KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \ + KVMPPC_GSE_DW_REGS_COUNT + KVMPPC_GSE_W_REGS_COUNT + \ + KVMPPC_GSE_VSRS_COUNT + KVMPPC_GSE_INTR_REGS_COUNT) + +/** + * Ranges of guest state buffer elements + */ +enum { + KVMPPC_GS_CLASS_GUESTWIDE = 0x01, + KVMPPC_GS_CLASS_META = 0x02, + KVMPPC_GS_CLASS_DWORD_REG = 0x04, + KVMPPC_GS_CLASS_WORD_REG = 0x08, + KVMPPC_GS_CLASS_VECTOR = 0x10, + KVMPPC_GS_CLASS_INTR = 0x20, +}; + +/** + * Types of guest state buffer elements + */ +enum { + KVMPPC_GSE_BE32, + KVMPPC_GSE_BE64, + KVMPPC_GSE_VEC128, + KVMPPC_GSE_PARTITION_TABLE, + KVMPPC_GSE_PROCESS_TABLE, + KVMPPC_GSE_BUFFER, + __KVMPPC_GSE_TYPE_MAX, +}; + +/** + * Flags for guest state elements + */ +enum { + KVMPPC_GS_FLAGS_WIDE = 0x01, +}; + +/** + * struct kvmppc_gs_part_table - deserialized partition table information + * element + * @address: start of the partition table + * @ea_bits: number of bits in the effective address + * @gpd_size: root page directory size + */ +struct kvmppc_gs_part_table { + u64 address; + u64 ea_bits; + u64 gpd_size; +}; + +/** + * struct kvmppc_gs_proc_table - deserialized process table information element + * @address: start of the process table + * @gpd_size: process table size + */ +struct kvmppc_gs_proc_table { + u64 address; + u64 gpd_size; +}; + +/** + * struct kvmppc_gs_buff_info - deserialized meta guest state buffer information + * @address: start of the guest state buffer + * @size: size of the guest state buffer + */ +struct kvmppc_gs_buff_info { + u64 address; + u64 size; +}; + +/** + * struct kvmppc_gs_header - serialized guest state buffer header + * @nelem: count of guest state elements in the buffer + * @data: start of the stream of elements in the buffer + */ +struct kvmppc_gs_header { + __be32 nelems; + char data[]; +} __packed; + +/** + * struct kvmppc_gs_elem - serialized guest state buffer element + * @iden: Guest State ID + * @len: length of data + * @data: the guest state buffer element's value + */ +struct kvmppc_gs_elem { + __be16 iden; + __be16 len; + char data[]; +} __packed; + +/** + * struct kvmppc_gs_buff - a guest state buffer with metadata. + * @capacity: total length of the buffer + * @len: current length of the elements and header + * @guest_id: guest id associated with the buffer + * @vcpu_id: vcpu_id associated with the buffer + * @hdr: the serialised guest state buffer + */ +struct kvmppc_gs_buff { + size_t capacity; + size_t len; + unsigned long guest_id; + unsigned long vcpu_id; + struct kvmppc_gs_header *hdr; +}; + +/** + * struct kvmppc_gs_bitmap - a bitmap for element ids + * @bitmap: a bitmap large enough for all Guest State IDs + */ +struct kvmppc_gs_bitmap { + /* private: */ + DECLARE_BITMAP(bitmap, KVMPPC_GSE_IDEN_COUNT); +}; + +/** + * struct kvmppc_gs_parser - a map of element ids to locations in a buffer + * @iterator: bitmap used for iterating + * @gses: contains the pointers to elements + * + * A guest state parser is used for deserialising a guest state buffer. + * Given a buffer, it then allows looking up guest state elements using + * a guest state id. + */ +struct kvmppc_gs_parser { + /* private: */ + struct kvmppc_gs_bitmap iterator; + struct kvmppc_gs_elem *gses[KVMPPC_GSE_IDEN_COUNT]; +}; + +enum { + GSM_GUEST_WIDE = 0x1, + GSM_SEND = 0x2, + GSM_RECEIVE = 0x4, + GSM_GSB_OWNER = 0x8, +}; + +struct kvmppc_gs_msg; + +/** + * struct kvmppc_gs_msg_ops - guest state message behavior + * @get_size: maximum size required for the message data + * @fill_info: serializes to the guest state buffer format + * @refresh_info: dserializes from the guest state buffer format + */ +struct kvmppc_gs_msg_ops { + size_t (*get_size)(struct kvmppc_gs_msg *gsm); + int (*fill_info)(struct kvmppc_gs_buff *gsb, struct kvmppc_gs_msg *gsm); + int (*refresh_info)(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb); +}; + +/** + * struct kvmppc_gs_msg - a guest state message + * @bitmap: the guest state ids that should be included + * @ops: modify message behavior for reading and writing to buffers + * @flags: guest wide or thread wide + * @data: location where buffer data will be written to or from. + * + * A guest state message is allows flexibility in sending in receiving data + * in a guest state buffer format. + */ +struct kvmppc_gs_msg { + struct kvmppc_gs_bitmap bitmap; + struct kvmppc_gs_msg_ops *ops; + unsigned long flags; + void *data; +}; + +/************************************************************************** + * Guest State IDs + **************************************************************************/ + +u16 kvmppc_gsid_size(u16 iden); +unsigned long kvmppc_gsid_flags(u16 iden); +u64 kvmppc_gsid_mask(u16 iden); + +/************************************************************************** + * Guest State Buffers + **************************************************************************/ +struct kvmppc_gs_buff *kvmppc_gsb_new(size_t size, unsigned long guest_id, + unsigned long vcpu_id, gfp_t flags); +void kvmppc_gsb_free(struct kvmppc_gs_buff *gsb); +void *kvmppc_gsb_put(struct kvmppc_gs_buff *gsb, size_t size); +int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags); +int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags); + +/** + * kvmppc_gsb_header() - the header of a guest state buffer + * @gsb: guest state buffer + * + * Returns a pointer to the buffer header. + */ +static inline struct kvmppc_gs_header * +kvmppc_gsb_header(struct kvmppc_gs_buff *gsb) +{ + return gsb->hdr; +} + +/** + * kvmppc_gsb_data() - the elements of a guest state buffer + * @gsb: guest state buffer + * + * Returns a pointer to the first element of the buffer data. + */ +static inline struct kvmppc_gs_elem *kvmppc_gsb_data(struct kvmppc_gs_buff *gsb) +{ + return (struct kvmppc_gs_elem *)kvmppc_gsb_header(gsb)->data; +} + +/** + * kvmppc_gsb_len() - the current length of a guest state buffer + * @gsb: guest state buffer + * + * Returns the length including the header of a buffer. + */ +static inline size_t kvmppc_gsb_len(struct kvmppc_gs_buff *gsb) +{ + return gsb->len; +} + +/** + * kvmppc_gsb_capacity() - the capacity of a guest state buffer + * @gsb: guest state buffer + * + * Returns the capacity of a buffer. + */ +static inline size_t kvmppc_gsb_capacity(struct kvmppc_gs_buff *gsb) +{ + return gsb->capacity; +} + +/** + * kvmppc_gsb_paddress() - the physical address of buffer + * @gsb: guest state buffer + * + * Returns the physical address of the buffer. + */ +static inline u64 kvmppc_gsb_paddress(struct kvmppc_gs_buff *gsb) +{ + return __pa(kvmppc_gsb_header(gsb)); +} + +/** + * kvmppc_gsb_nelems() - the number of elements in a buffer + * @gsb: guest state buffer + * + * Returns the number of elements in a buffer + */ +static inline u32 kvmppc_gsb_nelems(struct kvmppc_gs_buff *gsb) +{ + return be32_to_cpu(kvmppc_gsb_header(gsb)->nelems); +} + +/** + * kvmppc_gsb_reset() - empty a guest state buffer + * @gsb: guest state buffer + * + * Reset the number of elements and length of buffer to empty. + */ +static inline void kvmppc_gsb_reset(struct kvmppc_gs_buff *gsb) +{ + kvmppc_gsb_header(gsb)->nelems = cpu_to_be32(0); + gsb->len = sizeof(struct kvmppc_gs_header); +} + +/** + * kvmppc_gsb_data_len() - the length of a buffer excluding the header + * @gsb: guest state buffer + * + * Returns the length of a buffer excluding the header + */ +static inline size_t kvmppc_gsb_data_len(struct kvmppc_gs_buff *gsb) +{ + return gsb->len - sizeof(struct kvmppc_gs_header); +} + +/** + * kvmppc_gsb_data_cap() - the capacity of a buffer excluding the header + * @gsb: guest state buffer + * + * Returns the capacity of a buffer excluding the header + */ +static inline size_t kvmppc_gsb_data_cap(struct kvmppc_gs_buff *gsb) +{ + return gsb->capacity - sizeof(struct kvmppc_gs_header); +} + +/** + * kvmppc_gsb_for_each_elem - iterate over the elements in a buffer + * @i: loop counter + * @pos: set to current element + * @gsb: guest state buffer + * @rem: initialized to buffer capacity, holds bytes currently remaining in + * stream + */ +#define kvmppc_gsb_for_each_elem(i, pos, gsb, rem) \ + kvmppc_gse_for_each_elem(i, kvmppc_gsb_nelems(gsb), pos, \ + kvmppc_gsb_data(gsb), \ + kvmppc_gsb_data_cap(gsb), rem) + +/************************************************************************** + * Guest State Elements + **************************************************************************/ + +/** + * kvmppc_gse_iden() - guest state ID of element + * @gse: guest state element + * + * Return the guest state ID in host endianness. + */ +static inline u16 kvmppc_gse_iden(const struct kvmppc_gs_elem *gse) +{ + return be16_to_cpu(gse->iden); +} + +/** + * kvmppc_gse_len() - length of guest state element data + * @gse: guest state element + * + * Returns the length of guest state element data + */ +static inline u16 kvmppc_gse_len(const struct kvmppc_gs_elem *gse) +{ + return be16_to_cpu(gse->len); +} + +/** + * kvmppc_gse_total_len() - total length of guest state element + * @gse: guest state element + * + * Returns the length of the data plus the ID and size header. + */ +static inline u16 kvmppc_gse_total_len(const struct kvmppc_gs_elem *gse) +{ + return be16_to_cpu(gse->len) + sizeof(*gse); +} + +/** + * kvmppc_gse_total_size() - space needed for a given data length + * @size: data length + * + * Returns size plus the space needed for the ID and size header. + */ +static inline u16 kvmppc_gse_total_size(u16 size) +{ + return sizeof(struct kvmppc_gs_elem) + size; +} + +/** + * kvmppc_gse_data() - pointer to data of a guest state element + * @gse: guest state element + * + * Returns a pointer to the beginning of guest state element data. + */ +static inline void *kvmppc_gse_data(const struct kvmppc_gs_elem *gse) +{ + return (void *)gse->data; +} + +/** + * kvmppc_gse_ok() - checks space exists for guest state element + * @gse: guest state element + * @remaining: bytes of space remaining + * + * Returns true if the guest state element can fit in remaining space. + */ +static inline bool kvmppc_gse_ok(const struct kvmppc_gs_elem *gse, + int remaining) +{ + return remaining >= kvmppc_gse_total_len(gse); +} + +/** + * kvmppc_gse_next() - iterate to the next guest state element in a stream + * @gse: stream of guest state elements + * @remaining: length of the guest element stream + * + * Returns the next guest state element in a stream of elements. The length of + * the stream is updated in remaining. + */ +static inline struct kvmppc_gs_elem * +kvmppc_gse_next(const struct kvmppc_gs_elem *gse, int *remaining) +{ + int len = sizeof(*gse) + kvmppc_gse_len(gse); + + *remaining -= len; + return (struct kvmppc_gs_elem *)(gse->data + kvmppc_gse_len(gse)); +} + +/** + * kvmppc_gse_for_each_elem - iterate over a stream of guest state elements + * @i: loop counter + * @max: number of elements + * @pos: set to current element + * @head: head of elements + * @len: length of the stream + * @rem: initialized to len, holds bytes currently remaining elements + */ +#define kvmppc_gse_for_each_elem(i, max, pos, head, len, rem) \ + for (i = 0, pos = head, rem = len; kvmppc_gse_ok(pos, rem) && i < max; \ + pos = kvmppc_gse_next(pos, &(rem)), i++) + +int __kvmppc_gse_put(struct kvmppc_gs_buff *gsb, u16 iden, u16 size, + const void *data); +int kvmppc_gse_parse(struct kvmppc_gs_parser *gsp, struct kvmppc_gs_buff *gsb); + +/** + * kvmppc_gse_put_be32() - add a be32 guest state element to a buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: big endian value + */ +static inline int kvmppc_gse_put_be32(struct kvmppc_gs_buff *gsb, u16 iden, + __be32 val) +{ + __be32 tmp; + + tmp = val; + return __kvmppc_gse_put(gsb, iden, sizeof(__be32), &tmp); +} + +/** + * kvmppc_gse_put_u32() - add a host endian 32bit int guest state element to a + * buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: host endian value + */ +static inline int kvmppc_gse_put_u32(struct kvmppc_gs_buff *gsb, u16 iden, + u32 val) +{ + __be32 tmp; + + val &= kvmppc_gsid_mask(iden); + tmp = cpu_to_be32(val); + return kvmppc_gse_put_be32(gsb, iden, tmp); +} + +/** + * kvmppc_gse_put_be64() - add a be64 guest state element to a buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: big endian value + */ +static inline int kvmppc_gse_put_be64(struct kvmppc_gs_buff *gsb, u16 iden, + __be64 val) +{ + __be64 tmp; + + tmp = val; + return __kvmppc_gse_put(gsb, iden, sizeof(__be64), &tmp); +} + +/** + * kvmppc_gse_put_u64() - add a host endian 64bit guest state element to a + * buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: host endian value + */ +static inline int kvmppc_gse_put_u64(struct kvmppc_gs_buff *gsb, u16 iden, + u64 val) +{ + __be64 tmp; + + val &= kvmppc_gsid_mask(iden); + tmp = cpu_to_be64(val); + return kvmppc_gse_put_be64(gsb, iden, tmp); +} + +/** + * __kvmppc_gse_put_reg() - add a register type guest state element to a buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: host endian value + * + * Adds a register type guest state element. Uses the guest state ID for + * determining the length of the guest element. If the guest state ID has + * bits that can not be set they will be cleared. + */ +static inline int __kvmppc_gse_put_reg(struct kvmppc_gs_buff *gsb, u16 iden, + u64 val) +{ + val &= kvmppc_gsid_mask(iden); + if (kvmppc_gsid_size(iden) == sizeof(u64)) + return kvmppc_gse_put_u64(gsb, iden, val); + + if (kvmppc_gsid_size(iden) == sizeof(u32)) { + u32 tmp; + + tmp = (u32)val; + if (tmp != val) + return -EINVAL; + + return kvmppc_gse_put_u32(gsb, iden, tmp); + } + return -EINVAL; +} + +/** + * kvmppc_gse_put_vector128() - add a vector guest state element to a buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: 16 byte vector value + */ +static inline int kvmppc_gse_put_vector128(struct kvmppc_gs_buff *gsb, u16 iden, + vector128 *val) +{ + __be64 tmp[2] = { 0 }; + union { + __vector128 v; + u64 dw[2]; + } u; + + u.v = *val; + tmp[0] = cpu_to_be64(u.dw[TS_FPROFFSET]); +#ifdef CONFIG_VSX + tmp[1] = cpu_to_be64(u.dw[TS_VSRLOWOFFSET]); +#endif + return __kvmppc_gse_put(gsb, iden, sizeof(tmp), &tmp); +} + +/** + * kvmppc_gse_put_part_table() - add a partition table guest state element to a + * buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: partition table value + */ +static inline int kvmppc_gse_put_part_table(struct kvmppc_gs_buff *gsb, + u16 iden, + struct kvmppc_gs_part_table val) +{ + __be64 tmp[3]; + + tmp[0] = cpu_to_be64(val.address); + tmp[1] = cpu_to_be64(val.ea_bits); + tmp[2] = cpu_to_be64(val.gpd_size); + return __kvmppc_gse_put(gsb, KVMPPC_GSID_PARTITION_TABLE, sizeof(tmp), + &tmp); +} + +/** + * kvmppc_gse_put_proc_table() - add a process table guest state element to a + * buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: process table value + */ +static inline int kvmppc_gse_put_proc_table(struct kvmppc_gs_buff *gsb, + u16 iden, + struct kvmppc_gs_proc_table val) +{ + __be64 tmp[2]; + + tmp[0] = cpu_to_be64(val.address); + tmp[1] = cpu_to_be64(val.gpd_size); + return __kvmppc_gse_put(gsb, KVMPPC_GSID_PROCESS_TABLE, sizeof(tmp), + &tmp); +} + +/** + * kvmppc_gse_put_buff_info() - adds a GSB description guest state element to a + * buffer + * @gsb: guest state buffer to add element to + * @iden: guest state ID + * @val: guest state buffer description value + */ +static inline int kvmppc_gse_put_buff_info(struct kvmppc_gs_buff *gsb, u16 iden, + struct kvmppc_gs_buff_info val) +{ + __be64 tmp[2]; + + tmp[0] = cpu_to_be64(val.address); + tmp[1] = cpu_to_be64(val.size); + return __kvmppc_gse_put(gsb, iden, sizeof(tmp), &tmp); +} + +int __kvmppc_gse_put(struct kvmppc_gs_buff *gsb, u16 iden, u16 size, + const void *data); + +/** + * kvmppc_gse_get_be32() - return the data of a be32 element + * @gse: guest state element + */ +static inline __be32 kvmppc_gse_get_be32(const struct kvmppc_gs_elem *gse) +{ + if (WARN_ON(kvmppc_gse_len(gse) != sizeof(__be32))) + return 0; + return *(__be32 *)kvmppc_gse_data(gse); +} + +/** + * kvmppc_gse_get_u32() - return the data of a be32 element in host endianness + * @gse: guest state element + */ +static inline u32 kvmppc_gse_get_u32(const struct kvmppc_gs_elem *gse) +{ + return be32_to_cpu(kvmppc_gse_get_be32(gse)); +} + +/** + * kvmppc_gse_get_be64() - return the data of a be64 element + * @gse: guest state element + */ +static inline __be64 kvmppc_gse_get_be64(const struct kvmppc_gs_elem *gse) +{ + if (WARN_ON(kvmppc_gse_len(gse) != sizeof(__be64))) + return 0; + return *(__be64 *)kvmppc_gse_data(gse); +} + +/** + * kvmppc_gse_get_u64() - return the data of a be64 element in host endianness + * @gse: guest state element + */ +static inline u64 kvmppc_gse_get_u64(const struct kvmppc_gs_elem *gse) +{ + return be64_to_cpu(kvmppc_gse_get_be64(gse)); +} + +/** + * kvmppc_gse_get_vector128() - return the data of a vector element + * @gse: guest state element + */ +static inline void kvmppc_gse_get_vector128(const struct kvmppc_gs_elem *gse, + vector128 *v) +{ + union { + __vector128 v; + u64 dw[2]; + } u = { 0 }; + __be64 *src; + + if (WARN_ON(kvmppc_gse_len(gse) != sizeof(__vector128))) + *v = u.v; + + src = (__be64 *)kvmppc_gse_data(gse); + u.dw[TS_FPROFFSET] = be64_to_cpu(src[0]); +#ifdef CONFIG_VSX + u.dw[TS_VSRLOWOFFSET] = be64_to_cpu(src[1]); +#endif + *v = u.v; +} + +/************************************************************************** + * Guest State Bitmap + **************************************************************************/ + +bool kvmppc_gsbm_test(struct kvmppc_gs_bitmap *gsbm, u16 iden); +void kvmppc_gsbm_set(struct kvmppc_gs_bitmap *gsbm, u16 iden); +void kvmppc_gsbm_clear(struct kvmppc_gs_bitmap *gsbm, u16 iden); +u16 kvmppc_gsbm_next(struct kvmppc_gs_bitmap *gsbm, u16 prev); + +/** + * kvmppc_gsbm_zero - zero the entire bitmap + * @gsbm: guest state buffer bitmap + */ +static inline void kvmppc_gsbm_zero(struct kvmppc_gs_bitmap *gsbm) +{ + bitmap_zero(gsbm->bitmap, KVMPPC_GSE_IDEN_COUNT); +} + +/** + * kvmppc_gsbm_fill - fill the entire bitmap + * @gsbm: guest state buffer bitmap + */ +static inline void kvmppc_gsbm_fill(struct kvmppc_gs_bitmap *gsbm) +{ + bitmap_fill(gsbm->bitmap, KVMPPC_GSE_IDEN_COUNT); + clear_bit(0, gsbm->bitmap); +} + +/** + * kvmppc_gsbm_for_each - iterate the present guest state IDs + * @gsbm: guest state buffer bitmap + * @iden: current guest state ID + */ +#define kvmppc_gsbm_for_each(gsbm, iden) \ + for (iden = kvmppc_gsbm_next(gsbm, 0); iden != 0; \ + iden = kvmppc_gsbm_next(gsbm, iden)) + +/************************************************************************** + * Guest State Parser + **************************************************************************/ + +void kvmppc_gsp_insert(struct kvmppc_gs_parser *gsp, u16 iden, + struct kvmppc_gs_elem *gse); +struct kvmppc_gs_elem *kvmppc_gsp_lookup(struct kvmppc_gs_parser *gsp, + u16 iden); + +/** + * kvmppc_gsp_for_each - iterate the <guest state IDs, guest state element> + * pairs + * @gsp: guest state buffer bitmap + * @iden: current guest state ID + * @gse: guest state element + */ +#define kvmppc_gsp_for_each(gsp, iden, gse) \ + for (iden = kvmppc_gsbm_next(&(gsp)->iterator, 0), \ + gse = kvmppc_gsp_lookup((gsp), iden); \ + iden != 0; iden = kvmppc_gsbm_next(&(gsp)->iterator, iden), \ + gse = kvmppc_gsp_lookup((gsp), iden)) + +/************************************************************************** + * Guest State Message + **************************************************************************/ + +/** + * kvmppc_gsm_for_each - iterate the guest state IDs included in a guest state + * message + * @gsp: guest state buffer bitmap + * @iden: current guest state ID + * @gse: guest state element + */ +#define kvmppc_gsm_for_each(gsm, iden) \ + for (iden = kvmppc_gsbm_next(&gsm->bitmap, 0); iden != 0; \ + iden = kvmppc_gsbm_next(&gsm->bitmap, iden)) + +int kvmppc_gsm_init(struct kvmppc_gs_msg *mgs, struct kvmppc_gs_msg_ops *ops, + void *data, unsigned long flags); + +struct kvmppc_gs_msg *kvmppc_gsm_new(struct kvmppc_gs_msg_ops *ops, void *data, + unsigned long flags, gfp_t gfp_flags); +void kvmppc_gsm_free(struct kvmppc_gs_msg *gsm); +size_t kvmppc_gsm_size(struct kvmppc_gs_msg *gsm); +int kvmppc_gsm_fill_info(struct kvmppc_gs_msg *gsm, struct kvmppc_gs_buff *gsb); +int kvmppc_gsm_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb); + +/** + * kvmppc_gsm_include - indicate a guest state ID should be included when + * serializing + * @gsm: guest state message + * @iden: guest state ID + */ +static inline void kvmppc_gsm_include(struct kvmppc_gs_msg *gsm, u16 iden) +{ + kvmppc_gsbm_set(&gsm->bitmap, iden); +} + +/** + * kvmppc_gsm_includes - check if a guest state ID will be included when + * serializing + * @gsm: guest state message + * @iden: guest state ID + */ +static inline bool kvmppc_gsm_includes(struct kvmppc_gs_msg *gsm, u16 iden) +{ + return kvmppc_gsbm_test(&gsm->bitmap, iden); +} + +/** + * kvmppc_gsm_includes - indicate all guest state IDs should be included when + * serializing + * @gsm: guest state message + * @iden: guest state ID + */ +static inline void kvmppc_gsm_include_all(struct kvmppc_gs_msg *gsm) +{ + kvmppc_gsbm_fill(&gsm->bitmap); +} + +/** + * kvmppc_gsm_include - clear the guest state IDs that should be included when + * serializing + * @gsm: guest state message + */ +static inline void kvmppc_gsm_reset(struct kvmppc_gs_msg *gsm) +{ + kvmppc_gsbm_zero(&gsm->bitmap); +} + +/** + * kvmppc_gsb_receive_data - flexibly update values from a guest state buffer + * @gsb: guest state buffer + * @gsm: guest state message + * + * Requests updated values for the guest state values included in the guest + * state message. The guest state message will then deserialize the guest state + * buffer. + */ +static inline int kvmppc_gsb_receive_data(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + int rc; + + kvmppc_gsb_reset(gsb); + rc = kvmppc_gsm_fill_info(gsm, gsb); + if (rc < 0) + return rc; + + rc = kvmppc_gsb_recv(gsb, gsm->flags); + if (rc < 0) + return rc; + + rc = kvmppc_gsm_refresh_info(gsm, gsb); + if (rc < 0) + return rc; + return 0; +} + +/** + * kvmppc_gsb_recv - receive a single guest state ID + * @gsb: guest state buffer + * @gsm: guest state message + * @iden: guest state identity + */ +static inline int kvmppc_gsb_receive_datum(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm, u16 iden) +{ + int rc; + + kvmppc_gsm_include(gsm, iden); + rc = kvmppc_gsb_receive_data(gsb, gsm); + if (rc < 0) + return rc; + kvmppc_gsm_reset(gsm); + return 0; +} + +/** + * kvmppc_gsb_send_data - flexibly send values from a guest state buffer + * @gsb: guest state buffer + * @gsm: guest state message + * + * Sends the guest state values included in the guest state message. + */ +static inline int kvmppc_gsb_send_data(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + int rc; + + kvmppc_gsb_reset(gsb); + rc = kvmppc_gsm_fill_info(gsm, gsb); + if (rc < 0) + return rc; + rc = kvmppc_gsb_send(gsb, gsm->flags); + + return rc; +} + +/** + * kvmppc_gsb_recv - send a single guest state ID + * @gsb: guest state buffer + * @gsm: guest state message + * @iden: guest state identity + */ +static inline int kvmppc_gsb_send_datum(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm, u16 iden) +{ + int rc; + + kvmppc_gsm_include(gsm, iden); + rc = kvmppc_gsb_send_data(gsb, gsm); + if (rc < 0) + return rc; + kvmppc_gsm_reset(gsm); + return 0; +} + +#endif /* _ASM_POWERPC_GUEST_STATE_BUFFER_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index c099780385dd..ddb99e982917 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -100,6 +100,18 @@ #define H_COP_HW -74 #define H_STATE -75 #define H_IN_USE -77 + +#define H_INVALID_ELEMENT_ID -79 +#define H_INVALID_ELEMENT_SIZE -80 +#define H_INVALID_ELEMENT_VALUE -81 +#define H_INPUT_BUFFER_NOT_DEFINED -82 +#define H_INPUT_BUFFER_TOO_SMALL -83 +#define H_OUTPUT_BUFFER_NOT_DEFINED -84 +#define H_OUTPUT_BUFFER_TOO_SMALL -85 +#define H_PARTITION_PAGE_TABLE_NOT_DEFINED -86 +#define H_GUEST_VCPU_STATE_NOT_HV_OWNED -87 + + #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 @@ -381,6 +393,15 @@ #define H_ENTER_NESTED 0xF804 #define H_TLB_INVALIDATE 0xF808 #define H_COPY_TOFROM_GUEST 0xF80C +#define H_GUEST_GET_CAPABILITIES 0x460 +#define H_GUEST_SET_CAPABILITIES 0x464 +#define H_GUEST_CREATE 0x470 +#define H_GUEST_CREATE_VCPU 0x474 +#define H_GUEST_GET_STATE 0x478 +#define H_GUEST_SET_STATE 0x47C +#define H_GUEST_RUN_VCPU 0x480 +#define H_GUEST_COPY_MEMORY 0x484 +#define H_GUEST_DELETE 0x488 /* Flags for H_SVM_PAGE_IN */ #define H_PAGE_IN_SHARED 0x1 @@ -467,6 +488,15 @@ #define H_RPTI_PAGE_1G 0x08 #define H_RPTI_PAGE_ALL (-1UL) +/* Flags for H_GUEST_{S,G}_STATE */ +#define H_GUEST_FLAGS_WIDE (1UL<<(63-0)) + +/* Flag values used for H_{S,G}SET_GUEST_CAPABILITIES */ +#define H_GUEST_CAP_COPY_MEM (1UL<<(63-0)) +#define H_GUEST_CAP_POWER9 (1UL<<(63-1)) +#define H_GUEST_CAP_POWER10 (1UL<<(63-2)) +#define H_GUEST_CAP_BITMAP2 (1UL<<(63-63)) + #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 699a88584ae1..a656635df386 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -74,14 +74,14 @@ struct imc_events { * The following is the data structure to hold trace imc data. */ struct trace_imc_data { - u64 tb1; - u64 ip; - u64 val; - u64 cpmc1; - u64 cpmc2; - u64 cpmc3; - u64 cpmc4; - u64 tb2; + __be64 tb1; + __be64 ip; + __be64 val; + __be64 cpmc1; + __be64 cpmc2; + __be64 cpmc3; + __be64 cpmc4; + __be64 tb2; }; /* Event attribute array index */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 0732b743e099..5220274a6277 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -950,7 +950,7 @@ extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, * almost all conceivable cases a device driver should not be using * this function */ -static inline unsigned long virt_to_phys(volatile void * address) +static inline unsigned long virt_to_phys(const volatile void * address) { WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !virt_addr_valid(address)); diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index a1ddba01e7d1..e1b43aa12175 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -99,10 +99,14 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co void kexec_copy_flush(struct kimage *image); -#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS) +#if defined(CONFIG_CRASH_DUMP) +bool is_kdump_kernel(void); +#define is_kdump_kernel is_kdump_kernel +#if defined(CONFIG_PPC_RTAS) void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); #define crash_free_reserved_phys_range crash_free_reserved_phys_range -#endif +#endif /* CONFIG_PPC_RTAS */ +#endif /* CONFIG_CRASH_DUMP */ #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bbf5e2c5fe09..4f527d09c92b 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/kvm_host.h> #include <asm/kvm_book3s_asm.h> +#include <asm/guest-state-buffer.h> struct kvmppc_bat { u64 raw; @@ -191,14 +192,14 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, bool data, bool iswrite); extern void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, - unsigned int pshift, unsigned int lpid); + unsigned int pshift, u64 lpid); extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, unsigned int shift, const struct kvm_memory_slot *memslot, - unsigned int lpid); + u64 lpid); extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, - unsigned int lpid); + u64 lpid); extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, unsigned long gpa, struct kvm_memory_slot *memslot, @@ -207,7 +208,7 @@ extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, extern int kvmppc_init_vm_radix(struct kvm *kvm); extern void kvmppc_free_radix(struct kvm *kvm); extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, - unsigned int lpid); + u64 lpid); extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -295,12 +296,13 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} #endif +extern unsigned long nested_capabilities; long kvmhv_nested_init(void); void kvmhv_nested_exit(void); void kvmhv_vm_nested_init(struct kvm *kvm); long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu); -void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); +void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu); @@ -316,6 +318,69 @@ long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + +extern struct static_key_false __kvmhv_is_nestedv2; + +static inline bool kvmhv_is_nestedv2(void) +{ + return static_branch_unlikely(&__kvmhv_is_nestedv2); +} + +static inline bool kvmhv_is_nestedv1(void) +{ + return !static_branch_likely(&__kvmhv_is_nestedv2); +} + +#else + +static inline bool kvmhv_is_nestedv2(void) +{ + return false; +} + +static inline bool kvmhv_is_nestedv1(void) +{ + return false; +} + +#endif + +int __kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu, struct pt_regs *regs); +int __kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu, struct pt_regs *regs); +int __kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden); +int __kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden); + +static inline int kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu, + struct pt_regs *regs) +{ + if (kvmhv_is_nestedv2()) + return __kvmhv_nestedv2_reload_ptregs(vcpu, regs); + return 0; +} +static inline int kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu, + struct pt_regs *regs) +{ + if (kvmhv_is_nestedv2()) + return __kvmhv_nestedv2_mark_dirty_ptregs(vcpu, regs); + return 0; +} + +static inline int kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden) +{ + if (kvmhv_is_nestedv2()) + return __kvmhv_nestedv2_mark_dirty(vcpu, iden); + return 0; +} + +static inline int kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden) +{ + if (kvmhv_is_nestedv2()) + return __kvmhv_nestedv2_cached_reload(vcpu, iden); + return 0; +} + extern int kvm_irq_bypass; static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) @@ -335,60 +400,72 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { vcpu->arch.regs.gpr[num] = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_GPR(num)); } static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) { + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_GPR(num)) < 0); return vcpu->arch.regs.gpr[num]; } static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) { vcpu->arch.regs.ccr = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CR); } static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) { + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_CR) < 0); return vcpu->arch.regs.ccr; } static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.regs.xer = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_XER); } static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) { + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_XER) < 0); return vcpu->arch.regs.xer; } static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.regs.ctr = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CTR); } static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) { + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_CTR) < 0); return vcpu->arch.regs.ctr; } static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.regs.link = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LR); } static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) { + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_LR) < 0); return vcpu->arch.regs.link; } static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.regs.nip = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_NIA); } static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) { + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_NIA) < 0); return vcpu->arch.regs.nip; } @@ -403,10 +480,141 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return vcpu->arch.fault_dar; } +static inline u64 kvmppc_get_fpr(struct kvm_vcpu *vcpu, int i) +{ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSRS(i)) < 0); + return vcpu->arch.fp.fpr[i][TS_FPROFFSET]; +} + +static inline void kvmppc_set_fpr(struct kvm_vcpu *vcpu, int i, u64 val) +{ + vcpu->arch.fp.fpr[i][TS_FPROFFSET] = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSRS(i)); +} + +static inline u64 kvmppc_get_fpscr(struct kvm_vcpu *vcpu) +{ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_FPSCR) < 0); + return vcpu->arch.fp.fpscr; +} + +static inline void kvmppc_set_fpscr(struct kvm_vcpu *vcpu, u64 val) +{ + vcpu->arch.fp.fpscr = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_FPSCR); +} + + +static inline u64 kvmppc_get_vsx_fpr(struct kvm_vcpu *vcpu, int i, int j) +{ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSRS(i)) < 0); + return vcpu->arch.fp.fpr[i][j]; +} + +static inline void kvmppc_set_vsx_fpr(struct kvm_vcpu *vcpu, int i, int j, + u64 val) +{ + vcpu->arch.fp.fpr[i][j] = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSRS(i)); +} + +#ifdef CONFIG_ALTIVEC +static inline void kvmppc_get_vsx_vr(struct kvm_vcpu *vcpu, int i, vector128 *v) +{ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSRS(32 + i)) < 0); + *v = vcpu->arch.vr.vr[i]; +} + +static inline void kvmppc_set_vsx_vr(struct kvm_vcpu *vcpu, int i, + vector128 *val) +{ + vcpu->arch.vr.vr[i] = *val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSRS(32 + i)); +} + +static inline u32 kvmppc_get_vscr(struct kvm_vcpu *vcpu) +{ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSCR) < 0); + return vcpu->arch.vr.vscr.u[3]; +} + +static inline void kvmppc_set_vscr(struct kvm_vcpu *vcpu, u32 val) +{ + vcpu->arch.vr.vscr.u[3] = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSCR); +} +#endif + +#define KVMPPC_BOOK3S_VCPU_ACCESSOR_SET(reg, size, iden) \ +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ +{ \ + \ + vcpu->arch.reg = val; \ + kvmhv_nestedv2_mark_dirty(vcpu, iden); \ +} + +#define KVMPPC_BOOK3S_VCPU_ACCESSOR_GET(reg, size, iden) \ +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +{ \ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden) < 0); \ + return vcpu->arch.reg; \ +} + +#define KVMPPC_BOOK3S_VCPU_ACCESSOR(reg, size, iden) \ + KVMPPC_BOOK3S_VCPU_ACCESSOR_SET(reg, size, iden) \ + KVMPPC_BOOK3S_VCPU_ACCESSOR_GET(reg, size, iden) \ + +KVMPPC_BOOK3S_VCPU_ACCESSOR(pid, 32, KVMPPC_GSID_PIDR) +KVMPPC_BOOK3S_VCPU_ACCESSOR(tar, 64, KVMPPC_GSID_TAR) +KVMPPC_BOOK3S_VCPU_ACCESSOR(ebbhr, 64, KVMPPC_GSID_EBBHR) +KVMPPC_BOOK3S_VCPU_ACCESSOR(ebbrr, 64, KVMPPC_GSID_EBBRR) +KVMPPC_BOOK3S_VCPU_ACCESSOR(bescr, 64, KVMPPC_GSID_BESCR) +KVMPPC_BOOK3S_VCPU_ACCESSOR(ic, 64, KVMPPC_GSID_IC) +KVMPPC_BOOK3S_VCPU_ACCESSOR(vrsave, 64, KVMPPC_GSID_VRSAVE) + + +#define KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(reg, size, iden) \ +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ +{ \ + vcpu->arch.vcore->reg = val; \ + kvmhv_nestedv2_mark_dirty(vcpu, iden); \ +} + +#define KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(reg, size, iden) \ +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +{ \ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden) < 0); \ + return vcpu->arch.vcore->reg; \ +} + +#define KVMPPC_BOOK3S_VCORE_ACCESSOR(reg, size, iden) \ + KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(reg, size, iden) \ + KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(reg, size, iden) \ + + +KVMPPC_BOOK3S_VCORE_ACCESSOR(vtb, 64, KVMPPC_GSID_VTB) +KVMPPC_BOOK3S_VCORE_ACCESSOR(tb_offset, 64, KVMPPC_GSID_TB_OFFSET) +KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(arch_compat, 32, KVMPPC_GSID_LOGICAL_PVR) +KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(lpcr, 64, KVMPPC_GSID_LPCR) + +static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu) +{ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0); + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB) < 0); + return vcpu->arch.dec_expires; +} + +static inline void kvmppc_set_dec_expires(struct kvm_vcpu *vcpu, u64 val) +{ + vcpu->arch.dec_expires = val; + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0); + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB); +} + /* Expiry time of vcpu DEC relative to host TB */ static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu) { - return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset; + return kvmppc_get_dec_expires(vcpu) - kvmppc_get_tb_offset(vcpu); } static inline bool is_kvmppc_resume_guest(int r) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index d49065af08e9..2477021bff54 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -624,7 +624,7 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long gpa, unsigned int level, - unsigned long mmu_seq, unsigned int lpid, + unsigned long mmu_seq, u64 lpid, unsigned long *rmapp, struct rmap_nested **n_rmap); extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, struct rmap_nested **n_rmap); @@ -677,6 +677,12 @@ static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq, extern pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, unsigned long ea, unsigned *hshift); +int kvmhv_nestedv2_vcpu_create(struct kvm_vcpu *vcpu, struct kvmhv_nestedv2_io *io); +void kvmhv_nestedv2_vcpu_free(struct kvm_vcpu *vcpu, struct kvmhv_nestedv2_io *io); +int kvmhv_nestedv2_flush_vcpu(struct kvm_vcpu *vcpu, u64 time_limit); +int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1); +int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu); + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index 0c3401b2e19e..7c3291aa8922 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -89,6 +89,16 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) return vcpu->arch.regs.nip; } +static inline void kvmppc_set_fpr(struct kvm_vcpu *vcpu, int i, u64 val) +{ + vcpu->arch.fp.fpr[i][TS_FPROFFSET] = val; +} + +static inline u64 kvmppc_get_fpr(struct kvm_vcpu *vcpu, int i) +{ + return vcpu->arch.fp.fpr[i][TS_FPROFFSET]; +} + #ifdef CONFIG_BOOKE static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 14ee0dece853..8799b37be295 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include <asm/cacheflush.h> #include <asm/hvcall.h> #include <asm/mce.h> +#include <asm/guest-state-buffer.h> #define __KVM_HAVE_ARCH_VCPU_DEBUGFS @@ -276,7 +277,7 @@ struct kvm_resize_hpt; #define KVMPPC_SECURE_INIT_ABORT 0x4 /* H_SVM_INIT_ABORT issued */ struct kvm_arch { - unsigned int lpid; + u64 lpid; unsigned int smt_mode; /* # vcpus per virtual core */ unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -509,6 +510,23 @@ union xive_tma_w01 { __be64 w01; }; + /* Nestedv2 H_GUEST_RUN_VCPU configuration */ +struct kvmhv_nestedv2_config { + struct kvmppc_gs_buff_info vcpu_run_output_cfg; + struct kvmppc_gs_buff_info vcpu_run_input_cfg; + u64 vcpu_run_output_size; +}; + + /* Nestedv2 L1<->L0 communication state */ +struct kvmhv_nestedv2_io { + struct kvmhv_nestedv2_config cfg; + struct kvmppc_gs_buff *vcpu_run_output; + struct kvmppc_gs_buff *vcpu_run_input; + struct kvmppc_gs_msg *vcpu_message; + struct kvmppc_gs_msg *vcore_message; + struct kvmppc_gs_bitmap valids; +}; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -829,6 +847,8 @@ struct kvm_vcpu_arch { u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */ u32 nested_vcpu_id; gpa_t nested_io_gpr; + /* For nested APIv2 guests*/ + struct kvmhv_nestedv2_io nestedv2_io; #endif #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b4da8514af43..3281215097cc 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -615,6 +615,42 @@ static inline bool kvmhv_on_pseries(void) { return false; } + +#endif + +#ifndef CONFIG_PPC_BOOK3S + +static inline bool kvmhv_is_nestedv2(void) +{ + return false; +} + +static inline bool kvmhv_is_nestedv1(void) +{ + return false; +} + +static inline int kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu, + struct pt_regs *regs) +{ + return 0; +} +static inline int kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu, + struct pt_regs *regs) +{ + return 0; +} + +static inline int kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden) +{ + return 0; +} + +static inline int kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden) +{ + return 0; +} + #endif #ifdef CONFIG_KVM_XICS @@ -927,79 +963,85 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } -#define SPRNG_WRAPPER_GET(reg, bookehv_spr) \ +#define KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_GET(reg, bookehv_spr) \ static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ return mfspr(bookehv_spr); \ } \ -#define SPRNG_WRAPPER_SET(reg, bookehv_spr) \ +#define KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_SET(reg, bookehv_spr) \ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ { \ mtspr(bookehv_spr, val); \ } \ -#define SHARED_WRAPPER_GET(reg, size) \ +#define KVMPPC_VCPU_SHARED_REGS_ACCESSOR_GET(reg, size, iden) \ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ + if (iden) \ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden) < 0); \ if (kvmppc_shared_big_endian(vcpu)) \ - return be##size##_to_cpu(vcpu->arch.shared->reg); \ + return be##size##_to_cpu((__be##size __force)vcpu->arch.shared->reg); \ else \ - return le##size##_to_cpu(vcpu->arch.shared->reg); \ + return le##size##_to_cpu((__le##size __force)vcpu->arch.shared->reg); \ } \ -#define SHARED_WRAPPER_SET(reg, size) \ +#define KVMPPC_VCPU_SHARED_REGS_ACCESSOR_SET(reg, size, iden) \ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ { \ if (kvmppc_shared_big_endian(vcpu)) \ - vcpu->arch.shared->reg = cpu_to_be##size(val); \ + vcpu->arch.shared->reg = (u##size __force)cpu_to_be##size(val); \ else \ - vcpu->arch.shared->reg = cpu_to_le##size(val); \ + vcpu->arch.shared->reg = (u##size __force)cpu_to_le##size(val); \ + \ + if (iden) \ + kvmhv_nestedv2_mark_dirty(vcpu, iden); \ } \ -#define SHARED_WRAPPER(reg, size) \ - SHARED_WRAPPER_GET(reg, size) \ - SHARED_WRAPPER_SET(reg, size) \ +#define KVMPPC_VCPU_SHARED_REGS_ACCESSOR(reg, size, iden) \ + KVMPPC_VCPU_SHARED_REGS_ACCESSOR_GET(reg, size, iden) \ + KVMPPC_VCPU_SHARED_REGS_ACCESSOR_SET(reg, size, iden) \ -#define SPRNG_WRAPPER(reg, bookehv_spr) \ - SPRNG_WRAPPER_GET(reg, bookehv_spr) \ - SPRNG_WRAPPER_SET(reg, bookehv_spr) \ +#define KVMPPC_BOOKE_HV_SPRNG_ACCESSOR(reg, bookehv_spr) \ + KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_GET(reg, bookehv_spr) \ + KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_SET(reg, bookehv_spr) \ #ifdef CONFIG_KVM_BOOKE_HV -#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ - SPRNG_WRAPPER(reg, bookehv_spr) \ +#define KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(reg, size, bookehv_spr, iden) \ + KVMPPC_BOOKE_HV_SPRNG_ACCESSOR(reg, bookehv_spr) \ #else -#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ - SHARED_WRAPPER(reg, size) \ +#define KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(reg, size, bookehv_spr, iden) \ + KVMPPC_VCPU_SHARED_REGS_ACCESSOR(reg, size, iden) \ #endif -SHARED_WRAPPER(critical, 64) -SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0) -SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1) -SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2) -SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) -SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) -SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) -SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) -SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR) -SHARED_WRAPPER_GET(msr, 64) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR(critical, 64, 0) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg0, 64, SPRN_GSPRG0, KVMPPC_GSID_SPRG0) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg1, 64, SPRN_GSPRG1, KVMPPC_GSID_SPRG1) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg2, 64, SPRN_GSPRG2, KVMPPC_GSID_SPRG2) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg3, 64, SPRN_GSPRG3, KVMPPC_GSID_SPRG3) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(srr0, 64, SPRN_GSRR0, KVMPPC_GSID_SRR0) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(srr1, 64, SPRN_GSRR1, KVMPPC_GSID_SRR1) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(dar, 64, SPRN_GDEAR, KVMPPC_GSID_DAR) +KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(esr, 64, SPRN_GESR, 0) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR_GET(msr, 64, KVMPPC_GSID_MSR) static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val) { if (kvmppc_shared_big_endian(vcpu)) vcpu->arch.shared->msr = cpu_to_be64(val); else vcpu->arch.shared->msr = cpu_to_le64(val); + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_MSR); } -SHARED_WRAPPER(dsisr, 32) -SHARED_WRAPPER(int_pending, 32) -SHARED_WRAPPER(sprg4, 64) -SHARED_WRAPPER(sprg5, 64) -SHARED_WRAPPER(sprg6, 64) -SHARED_WRAPPER(sprg7, 64) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR(dsisr, 32, KVMPPC_GSID_DSISR) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR(int_pending, 32, 0) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg4, 64, 0) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg5, 64, 0) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg6, 64, 0) +KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg7, 64, 0) static inline u32 kvmppc_get_sr(struct kvm_vcpu *vcpu, int nr) { diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0e93a4728c9e..141d82e249a8 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -188,7 +188,6 @@ typedef struct { } mm_context_t; #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) -#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) /* Page size definitions, common between 32 and 64-bit * diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index f99c53a5f184..9164a9e41b02 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -9,10 +9,6 @@ #include <linux/threads.h> #include <asm/mmu.h> /* For sub-arch specific PPC_PIN_SIZE */ -#ifdef CONFIG_44x -extern int icache_44x_need_flush; -#endif - #endif /* __ASSEMBLY__ */ #define PTE_INDEX_SIZE PTE_SHIFT @@ -55,26 +51,22 @@ extern int icache_44x_need_flush; #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ - (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -#ifndef __ASSEMBLY__ - -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); -void unmap_kernel_page(unsigned long va); - -#endif /* !__ASSEMBLY__ */ - - /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ -#include <asm/fixmap.h> + +#define FIXADDR_SIZE 0 +#ifdef CONFIG_KASAN +#include <asm/kasan.h> +#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) +#else +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) +#endif /* * ioremap_bot starts at that address. Early ioremaps move down from there, @@ -151,7 +143,7 @@ void unmap_kernel_page(unsigned long va); * The mask covered by the RPN must be a ULL on 32-bit platforms with * 64-bit PTEs. */ -#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) +#ifdef CONFIG_PTE_64BIT #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) #define MAX_POSSIBLE_PHYSMEM_BITS 36 #else @@ -159,48 +151,8 @@ void unmap_kernel_page(unsigned long va); #define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif -/* - * _PAGE_CHG_MASK masks of bits that are to be preserved across - * pgprot changes. - */ -#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) - #ifndef __ASSEMBLY__ -#define pte_clear(mm, addr, ptep) \ - do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0) - -#ifndef pte_mkwrite_novma -static inline pte_t pte_mkwrite_novma(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_RW); -} -#endif - -static inline pte_t pte_mkdirty(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_DIRTY); -} - -static inline pte_t pte_mkyoung(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_ACCESSED); -} - -#ifndef pte_wrprotect -static inline pte_t pte_wrprotect(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_RW); -} -#endif - -#ifndef pte_mkexec -static inline pte_t pte_mkexec(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_EXEC); -} -#endif - #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) #define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK) @@ -210,141 +162,6 @@ static inline void pmd_clear(pmd_t *pmdp) } /* - * PTE updates. This function is called whenever an existing - * valid PTE is updated. This does -not- include set_pte_at() - * which nowadays only sets a new PTE. - * - * Depending on the type of MMU, we may need to use atomic updates - * and the PTE may be either 32 or 64 bit wide. In the later case, - * when using atomic updates, only the low part of the PTE is - * accessed atomically. - * - * In addition, on 44x, we also maintain a global flag indicating - * that an executable user mapping was modified, which is needed - * to properly flush the virtually tagged instruction cache of - * those implementations. - * - * On the 8xx, the page tables are a bit special. For 16k pages, we have - * 4 identical entries. For 512k pages, we have 128 entries as if it was - * 4k pages, but they are flagged as 512k pages for the hardware. - * For other page sizes, we have a single entry in the table. - */ -#ifdef CONFIG_PPC_8xx -static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); -static int hugepd_ok(hugepd_t hpd); - -static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) -{ - if (!huge) - return PAGE_SIZE / SZ_4K; - else if (hugepd_ok(*((hugepd_t *)pmd))) - return 1; - else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE)) - return SZ_16K / SZ_4K; - else - return SZ_512K / SZ_4K; -} - -static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, - unsigned long clr, unsigned long set, int huge) -{ - pte_basic_t *entry = (pte_basic_t *)p; - pte_basic_t old = pte_val(*p); - pte_basic_t new = (old & ~(pte_basic_t)clr) | set; - int num, i; - pmd_t *pmd = pmd_off(mm, addr); - - num = number_of_cells_per_pte(pmd, new, huge); - - for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) { - *entry++ = new; - if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) { - *entry++ = new; - *entry++ = new; - *entry++ = new; - } - } - - return old; -} - -#ifdef CONFIG_PPC_16K_PAGES -#define ptep_get ptep_get -static inline pte_t ptep_get(pte_t *ptep) -{ - pte_basic_t val = READ_ONCE(ptep->pte); - pte_t pte = {val, val, val, val}; - - return pte; -} -#endif /* CONFIG_PPC_16K_PAGES */ - -#else -static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, - unsigned long clr, unsigned long set, int huge) -{ - pte_basic_t old = pte_val(*p); - pte_basic_t new = (old & ~(pte_basic_t)clr) | set; - - *p = __pte(new); - -#ifdef CONFIG_44x - if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) - icache_44x_need_flush = 1; -#endif - return old; -} -#endif - -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old; - old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - return (old & _PAGE_ACCESSED) != 0; -} -#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) - -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); -} - -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -#ifndef ptep_set_wrprotect -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); -} -#endif - -#ifndef __ptep_set_access_flags -static inline void __ptep_set_access_flags(struct vm_area_struct *vma, - pte_t *ptep, pte_t entry, - unsigned long address, - int psize) -{ - unsigned long set = pte_val(entry) & - (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - int huge = psize > mmu_virtual_psize ? 1 : 0; - - pte_update(vma->vm_mm, address, ptep, 0, set, huge); - - flush_tlb_page(vma, address); -} -#endif - -static inline int pte_young(pte_t pte) -{ - return pte_val(pte) & _PAGE_ACCESSED; -} - -/* * Note that on Book E processors, the pmd contains the kernel virtual * (lowmem) address of the pte page. The physical address is less useful * because everything runs with translation enabled (even the TLB miss diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index 6fe46e754556..d759cfd74754 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -42,10 +42,10 @@ #define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */ #define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ #define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ -#define _PAGE_USER 0x010 /* matches one of the zone permission bits */ +#define _PAGE_READ 0x010 /* software: read permission */ #define _PAGE_SPECIAL 0x020 /* software: Special page */ #define _PAGE_DIRTY 0x080 /* software: dirty page */ -#define _PAGE_RW 0x100 /* hardware: WR, anded with dirty in exception */ +#define _PAGE_WRITE 0x100 /* hardware: WR, anded with dirty in exception */ #define _PAGE_EXEC 0x200 /* hardware: EX permission */ #define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ @@ -55,11 +55,6 @@ /* cache related flags non existing on 40x */ #define _PAGE_COHERENT 0 -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - #define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */ #define _PMD_PRESENT_MASK _PMD_PRESENT #define _PMD_BAD 0x802 @@ -69,20 +64,10 @@ #define _PTE_NONE_MASK 0 -/* Until my rework is finished, 40x still needs atomic PTE updates */ -#define PTE_ATOMIC_UPDATES 1 - #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC) -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include <asm/pgtable-masks.h> #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h index b7ed13cee137..851813725237 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-44x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h @@ -63,12 +63,12 @@ */ #define _PAGE_PRESENT 0x00000001 /* S: PTE valid */ -#define _PAGE_RW 0x00000002 /* S: Write permission */ +#define _PAGE_WRITE 0x00000002 /* S: Write permission */ #define _PAGE_EXEC 0x00000004 /* H: Execute permission */ -#define _PAGE_ACCESSED 0x00000008 /* S: Page referenced */ +#define _PAGE_READ 0x00000008 /* S: Read permission */ #define _PAGE_DIRTY 0x00000010 /* S: Page dirty */ #define _PAGE_SPECIAL 0x00000020 /* S: Special page */ -#define _PAGE_USER 0x00000040 /* S: User page */ +#define _PAGE_ACCESSED 0x00000040 /* S: Page referenced */ #define _PAGE_ENDIAN 0x00000080 /* H: E bit */ #define _PAGE_GUARDED 0x00000100 /* H: G bit */ #define _PAGE_COHERENT 0x00000200 /* H: M bit */ @@ -78,11 +78,6 @@ /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - /* TODO: Add large page lowmem mapping support */ #define _PMD_PRESENT 0 #define _PMD_PRESENT_MASK (PAGE_MASK) @@ -105,14 +100,7 @@ #define _PAGE_BASE (_PAGE_BASE_NC) #endif -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include <asm/pgtable-masks.h> #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h index 16451df5ddb0..653a342d3b25 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-85xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h @@ -17,9 +17,9 @@ */ /* Definitions for FSL Book-E Cores */ -#define _PAGE_PRESENT 0x00001 /* S: PTE contains a translation */ -#define _PAGE_USER 0x00002 /* S: User page (maps to UR) */ -#define _PAGE_RW 0x00004 /* S: Write permission (SW) */ +#define _PAGE_READ 0x00001 /* H: Read permission (SR) */ +#define _PAGE_PRESENT 0x00002 /* S: PTE contains a translation */ +#define _PAGE_WRITE 0x00004 /* S: Write permission (SW) */ #define _PAGE_DIRTY 0x00008 /* S: Page dirty */ #define _PAGE_EXEC 0x00010 /* H: SX permission */ #define _PAGE_ACCESSED 0x00020 /* S: Page referenced */ @@ -31,11 +31,6 @@ #define _PAGE_WRITETHRU 0x00400 /* H: W bit */ #define _PAGE_SPECIAL 0x00800 /* S: Special page */ -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 @@ -61,14 +56,7 @@ #define _PAGE_BASE (_PAGE_BASE_NC) #endif -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include <asm/pgtable-masks.h> #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index e6fe1d5731f2..137dc3c84e45 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -48,6 +48,11 @@ #define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */ +#define _PAGE_NAX (_PAGE_NA | _PAGE_EXEC) +#define _PAGE_ROX (_PAGE_RO | _PAGE_EXEC) +#define _PAGE_RW 0 +#define _PAGE_RWX _PAGE_EXEC + /* cache related flags non existing on 8xx */ #define _PAGE_COHERENT 0 #define _PAGE_WRITETHRU 0 @@ -77,14 +82,7 @@ #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) #define _PAGE_BASE (_PAGE_BASE_NC) -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) -#define PAGE_SHARED __pgprot(_PAGE_BASE) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC) +#include <asm/pgtable-masks.h> #ifndef __ASSEMBLY__ static inline pte_t pte_wrprotect(pte_t pte) @@ -115,27 +113,6 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) #define pte_mkwrite_novma pte_mkwrite_novma -static inline bool pte_user(pte_t pte) -{ - return !(pte_val(pte) & _PAGE_SH); -} - -#define pte_user pte_user - -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_SH); -} - -#define pte_mkprivileged pte_mkprivileged - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_SH); -} - -#define pte_mkuser pte_mkuser - static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE); @@ -187,6 +164,63 @@ static inline unsigned long pte_leaf_size(pte_t pte) #define pte_leaf_size pte_leaf_size +/* + * On the 8xx, the page tables are a bit special. For 16k pages, we have + * 4 identical entries. For 512k pages, we have 128 entries as if it was + * 4k pages, but they are flagged as 512k pages for the hardware. + * For other page sizes, we have a single entry in the table. + */ +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); +static int hugepd_ok(hugepd_t hpd); + +static inline int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) +{ + if (!huge) + return PAGE_SIZE / SZ_4K; + else if (hugepd_ok(*((hugepd_t *)pmd))) + return 1; + else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE)) + return SZ_16K / SZ_4K; + else + return SZ_512K / SZ_4K; +} + +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) +{ + pte_basic_t *entry = (pte_basic_t *)p; + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + int num, i; + pmd_t *pmd = pmd_off(mm, addr); + + num = number_of_cells_per_pte(pmd, new, huge); + + for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) { + *entry++ = new; + if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) { + *entry++ = new; + *entry++ = new; + *entry++ = new; + } + } + + return old; +} + +#define pte_update pte_update + +#ifdef CONFIG_PPC_16K_PAGES +#define ptep_get ptep_get +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_basic_t val = READ_ONCE(ptep->pte); + pte_t pte = {val, val, val, val}; + + return pte; +} +#endif /* CONFIG_PPC_16K_PAGES */ + #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index eb6891e34cbd..2202c78730e8 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -57,6 +57,7 @@ #define IOREMAP_START (ioremap_bot) #define IOREMAP_END (KERN_IO_START + KERN_IO_SIZE - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M +#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) /* * Defines the address of the vmemap area, in its own region on @@ -74,37 +75,11 @@ #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) -/* - * _PAGE_CHG_MASK masks of bits that are to be preserved across - * pgprot changes. - */ -#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) - #define H_PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ /* pte_clear moved to later in this file */ -static inline pte_t pte_mkwrite_novma(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_RW); -} - -static inline pte_t pte_mkdirty(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_DIRTY); -} - -static inline pte_t pte_mkyoung(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_ACCESSED); -} - -static inline pte_t pte_wrprotect(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_RW); -} - #define PMD_BAD_BITS (PTE_TABLE_SIZE-1) #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) @@ -170,107 +145,20 @@ static inline void p4d_set(p4d_t *p4dp, unsigned long val) *p4dp = __p4d(val); } -/* Atomic PTE updates */ -static inline unsigned long pte_update(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, unsigned long clr, - unsigned long set, - int huge) -{ - unsigned long old = pte_val(*ptep); - *ptep = __pte((old & ~clr) | set); - - /* huge pages use the old page table lock */ - if (!huge) - assert_pte_locked(mm, addr); - - return old; -} - -static inline int pte_young(pte_t pte) -{ - return pte_val(pte) & _PAGE_ACCESSED; -} - -static inline int __ptep_test_and_clear_young(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old; - - if (!pte_young(*ptep)) - return 0; - old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - return (old & _PAGE_ACCESSED) != 0; -} -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ -({ \ - int __r; \ - __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ - __r; \ -}) - -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); -} - #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_RW, 0, 1); + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young(__vma, __address, __ptep) \ ({ \ - int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \ - __ptep); \ + int __young = ptep_test_and_clear_young(__vma, __address, __ptep);\ __young; \ }) -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0); - return __pte(old); -} - -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t * ptep) -{ - pte_update(mm, addr, ptep, ~0UL, 0, 0); -} - - -/* Set the dirty and/or accessed bits atomically in a linux PTE */ -static inline void __ptep_set_access_flags(struct vm_area_struct *vma, - pte_t *ptep, pte_t entry, - unsigned long address, - int psize) -{ - unsigned long bits = pte_val(entry) & - (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - - unsigned long old = pte_val(*ptep); - *ptep = __pte(old | bits); - - flush_tlb_page(vma, address); -} - -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ @@ -310,8 +198,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, /* We borrow MSB 56 (LSB 7) to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE 0x80 -int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); -void unmap_kernel_page(unsigned long va); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index c721478c5934..427db14292c9 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -2,12 +2,23 @@ #ifndef _ASM_POWERPC_NOHASH_PGTABLE_H #define _ASM_POWERPC_NOHASH_PGTABLE_H +#ifndef __ASSEMBLY__ +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge); +#endif + #if defined(CONFIG_PPC64) #include <asm/nohash/64/pgtable.h> #else #include <asm/nohash/32/pgtable.h> #endif +/* + * _PAGE_CHG_MASK masks of bits that are to be preserved across + * pgprot changes. + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) + /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE) @@ -18,16 +29,136 @@ #ifndef __ASSEMBLY__ +extern int icache_44x_need_flush; + +/* + * PTE updates. This function is called whenever an existing + * valid PTE is updated. This does -not- include set_pte_at() + * which nowadays only sets a new PTE. + * + * Depending on the type of MMU, we may need to use atomic updates + * and the PTE may be either 32 or 64 bit wide. In the later case, + * when using atomic updates, only the low part of the PTE is + * accessed atomically. + * + * In addition, on 44x, we also maintain a global flag indicating + * that an executable user mapping was modified, which is needed + * to properly flush the virtually tagged instruction cache of + * those implementations. + */ +#ifndef pte_update +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) +{ + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + + if (new == old) + return old; + + *p = __pte(new); + + if (IS_ENABLED(CONFIG_44x) && !is_kernel_addr(addr) && (old & _PAGE_EXEC)) + icache_44x_need_flush = 1; + + /* huge pages use the old page table lock */ + if (!huge) + assert_pte_locked(mm, addr); + + return old; +} +#endif + +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + unsigned long old; + + old = pte_update(vma->vm_mm, addr, ptep, _PAGE_ACCESSED, 0, 0); + + return (old & _PAGE_ACCESSED) != 0; +} +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG + +#ifndef ptep_set_wrprotect +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); +} +#endif +#define __HAVE_ARCH_PTEP_SET_WRPROTECT + +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0)); +} +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_update(mm, addr, ptep, ~0UL, 0, 0); +} + +/* Set the dirty and/or accessed bits atomically in a linux PTE */ +#ifndef __ptep_set_access_flags +static inline void __ptep_set_access_flags(struct vm_area_struct *vma, + pte_t *ptep, pte_t entry, + unsigned long address, + int psize) +{ + unsigned long set = pte_val(entry) & + (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); + int huge = psize > mmu_virtual_psize ? 1 : 0; + + pte_update(vma->vm_mm, address, ptep, 0, set, huge); + + flush_tlb_page(vma, address); +} +#endif + /* Generic accessors to PTE bits */ +#ifndef pte_mkwrite_novma +static inline pte_t pte_mkwrite_novma(pte_t pte) +{ + /* + * write implies read, hence set both + */ + return __pte(pte_val(pte) | _PAGE_RW); +} +#endif + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +#ifndef pte_wrprotect +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_WRITE); +} +#endif + +#ifndef pte_mkexec +static inline pte_t pte_mkexec(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_EXEC); +} +#endif + #ifndef pte_write static inline int pte_write(pte_t pte) { - return pte_val(pte) & _PAGE_RW; + return pte_val(pte) & _PAGE_WRITE; } #endif -#ifndef pte_read -static inline int pte_read(pte_t pte) { return 1; } -#endif static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } @@ -35,23 +166,6 @@ static inline bool pte_hashpte(pte_t pte) { return false; } static inline bool pte_ci(pte_t pte) { return pte_val(pte) & _PAGE_NO_CACHE; } static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } -#ifdef CONFIG_NUMA_BALANCING -/* - * These work without NUMA balancing but the kernel does not care. See the - * comment in include/linux/pgtable.h . On powerpc, this will only - * work for user pages and always return true for kernel pages. - */ -static inline int pte_protnone(pte_t pte) -{ - return pte_present(pte) && !pte_user(pte); -} - -static inline int pmd_protnone(pmd_t pmd) -{ - return pte_protnone(pmd_pte(pmd)); -} -#endif /* CONFIG_NUMA_BALANCING */ - static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; @@ -62,15 +176,20 @@ static inline bool pte_hw_valid(pte_t pte) return pte_val(pte) & _PAGE_PRESENT; } +static inline int pte_young(pte_t pte) +{ + return pte_val(pte) & _PAGE_ACCESSED; +} + /* - * Don't just check for any non zero bits in __PAGE_USER, since for book3e + * Don't just check for any non zero bits in __PAGE_READ, since for book3e * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in - * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too. + * _PAGE_READ. Need to explicitly match _PAGE_BAP_UR bit in that case too. */ -#ifndef pte_user -static inline bool pte_user(pte_t pte) +#ifndef pte_read +static inline bool pte_read(pte_t pte) { - return (pte_val(pte) & _PAGE_USER) == _PAGE_USER; + return (pte_val(pte) & _PAGE_READ) == _PAGE_READ; } #endif @@ -82,10 +201,10 @@ static inline bool pte_user(pte_t pte) static inline bool pte_access_permitted(pte_t pte, bool write) { /* - * A read-only access is controlled by _PAGE_USER bit. - * We have _PAGE_READ set for WRITE and EXECUTE + * A read-only access is controlled by _PAGE_READ bit. + * We have _PAGE_READ set for WRITE */ - if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) + if (!pte_present(pte) || !pte_read(pte)) return false; if (write && !pte_write(pte)) @@ -132,20 +251,6 @@ static inline pte_t pte_mkhuge(pte_t pte) } #endif -#ifndef pte_mkprivileged -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_USER); -} -#endif - -#ifndef pte_mkuser -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_USER); -} -#endif - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); @@ -207,11 +312,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, mb(); } - -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, pte_t entry, int dirty); - /* * Macro to mark a page protection value as "uncacheable". */ @@ -240,11 +340,6 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre #define pgprot_writecombine pgprot_noncached_wc -struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#define __HAVE_PHYS_MEM_ACCESS_PROT - #ifdef CONFIG_HUGETLB_PAGE static inline int hugepd_ok(hugepd_t hpd) { @@ -269,20 +364,8 @@ static inline int pud_huge(pud_t pud) #define is_hugepd(hpd) (hugepd_ok(hpd)) #endif -/* - * This gets called at the end of handling a page fault, when - * the kernel has put a new PTE into the page table for the process. - * We use it to ensure coherency between the i-cache and d-cache - * for the page which has just been mapped in. - */ -#if defined(CONFIG_PPC_E500) && defined(CONFIG_HUGETLB_PAGE) -void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, unsigned int nr); -#else -static inline void update_mmu_cache_range(struct vm_fault *vmf, - struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, unsigned int nr) {} -#endif +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h index d8924cbd61e4..f516f0b5b7a8 100644 --- a/arch/powerpc/include/asm/nohash/pte-e500.h +++ b/arch/powerpc/include/asm/nohash/pte-e500.h @@ -48,13 +48,20 @@ /* "Higher level" linux bit combinations */ #define _PAGE_EXEC (_PAGE_BAP_SX | _PAGE_BAP_UX) /* .. and was cache cleaned */ -#define _PAGE_RW (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */ +#define _PAGE_READ (_PAGE_BAP_SR | _PAGE_BAP_UR) /* User read permission */ +#define _PAGE_WRITE (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */ + #define _PAGE_KERNEL_RW (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_BAP_SR) #define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX) #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) -#define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ -#define _PAGE_PRIVILEGED (_PAGE_BAP_SR) + +#define _PAGE_NA 0 +#define _PAGE_NAX _PAGE_BAP_UX +#define _PAGE_RO _PAGE_READ +#define _PAGE_ROX (_PAGE_READ | _PAGE_BAP_UX) +#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) +#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_BAP_UX) #define _PAGE_SPECIAL _PAGE_SW0 @@ -89,36 +96,12 @@ #define _PAGE_BASE (_PAGE_BASE_NC) #endif -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_BAP_UX) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX) +#include <asm/pgtable-masks.h> #ifndef __ASSEMBLY__ -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte((pte_val(pte) & ~_PAGE_USER) | _PAGE_PRIVILEGED); -} - -#define pte_mkprivileged pte_mkprivileged - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte((pte_val(pte) & ~_PAGE_PRIVILEGED) | _PAGE_USER); -} - -#define pte_mkuser pte_mkuser - static inline pte_t pte_mkexec(pte_t pte) { - if (pte_val(pte) & _PAGE_BAP_UR) - return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX); - else - return __pte((pte_val(pte) & ~_PAGE_BAP_UX) | _PAGE_BAP_SX); + return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX); } #define pte_mkexec pte_mkexec diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a9b31cc258fc..b66b0c615f4f 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -227,7 +227,7 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, uint64_t data); int64_t opal_pci_poll2(uint64_t id, uint64_t data); -int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); +int64_t opal_int_get_xirr(__be32 *out_xirr, bool just_poll); int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_eoi(uint32_t xirr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index e08513d73119..ac4279208d63 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -71,6 +71,11 @@ static inline void yield_to_any(void) { plpar_hcall_norets_notrace(H_CONFER, -1, 0); } + +static inline bool is_vcpu_idle(int vcpu) +{ + return lppaca_of(vcpu).idle; +} #else static inline bool is_shared_processor(void) { @@ -100,6 +105,10 @@ static inline void prod_cpu(int cpu) ___bad_prod_cpu(); /* This would be a bug */ } +static inline bool is_vcpu_idle(int vcpu) +{ + return false; +} #endif #define vcpu_is_preempted vcpu_is_preempted @@ -121,9 +130,23 @@ static inline bool vcpu_is_preempted(int cpu) if (!is_shared_processor()) return false; + /* + * If the hypervisor has dispatched the target CPU on a physical + * processor, then the target CPU is definitely not preempted. + */ + if (!(yield_count_of(cpu) & 1)) + return false; + + /* + * If the target CPU has yielded to Hypervisor but OS has not + * requested idle then the target CPU is definitely preempted. + */ + if (!is_vcpu_idle(cpu)) + return true; + #ifdef CONFIG_PPC_SPLPAR if (!is_kvm_guest()) { - int first_cpu; + int first_cpu, i; /* * The result of vcpu_is_preempted() is used in a @@ -149,11 +172,29 @@ static inline bool vcpu_is_preempted(int cpu) */ if (cpu_first_thread_sibling(cpu) == first_cpu) return false; + + /* + * If any of the threads of the target CPU's core are not + * preempted or ceded, then consider target CPU to be + * non-preempted. + */ + first_cpu = cpu_first_thread_sibling(cpu); + for (i = first_cpu; i < first_cpu + threads_per_core; i++) { + if (i == cpu) + continue; + if (!(yield_count_of(i) & 1)) + return false; + if (!is_vcpu_idle(i)) + return true; + } } #endif - if (yield_count_of(cpu) & 1) - return true; + /* + * None of the threads in target CPU's core are running but none of + * them were preempted too. Hence assume the target CPU to be + * non-preempted. + */ return false; } diff --git a/arch/powerpc/include/asm/pgtable-masks.h b/arch/powerpc/include/asm/pgtable-masks.h new file mode 100644 index 000000000000..6e8e2db26a5a --- /dev/null +++ b/arch/powerpc/include/asm/pgtable-masks.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_PGTABLE_MASKS_H +#define _ASM_POWERPC_PGTABLE_MASKS_H + +#ifndef _PAGE_NA +#define _PAGE_NA 0 +#define _PAGE_NAX _PAGE_EXEC +#define _PAGE_RO _PAGE_READ +#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) +#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) +#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) +#endif + +/* Permission flags for kernel mappings */ +#ifndef _PAGE_KERNEL_RO +#define _PAGE_KERNEL_RO _PAGE_RO +#define _PAGE_KERNEL_ROX _PAGE_ROX +#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) +#define _PAGE_KERNEL_RWX (_PAGE_RWX | _PAGE_DIRTY) +#endif + +/* Permission masks used to generate the __P and __S table */ +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) +#define PAGE_EXECONLY_X __pgprot(_PAGE_BASE | _PAGE_NAX) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RWX) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_ROX) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_ROX) + +#endif /* _ASM_POWERPC_PGTABLE_MASKS_H */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index d0ee46de248e..2bfb7dd3b49e 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -71,6 +71,12 @@ static inline pgprot_t pte_pgprot(pte_t pte) return __pgprot(pte_flags); } +static inline pgprot_t pgprot_nx(pgprot_t prot) +{ + return pte_pgprot(pte_exprotect(__pte(pgprot_val(prot)))); +} +#define pgprot_nx pgprot_nx + #ifndef pmd_page_vaddr static inline const void *pmd_page_vaddr(pmd_t pmd) { @@ -110,6 +116,35 @@ void mark_initmem_nx(void); static inline void mark_initmem_nx(void) { } #endif +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, pte_t entry, int dirty); + +struct file; +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); + +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to ensure coherency between the i-cache and d-cache + * for the page which has just been mapped in. + * On machines which use an MMU hash table, we use this to put a + * corresponding HPTE into the hash table ahead of time, instead of + * waiting for the inevitable extra hash-table miss exception. + */ +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, unsigned int nr) +{ + if ((mmu_has_feature(MMU_FTR_HPTE_TABLE) && !radix_enabled()) || + (IS_ENABLED(CONFIG_PPC_E500) && IS_ENABLED(CONFIG_HUGETLB_PAGE))) + __update_mmu_cache(vma, address, ptep); +} + /* * When used, PTE_FRAG_NR is defined in subarch pgtable.h * so we are sure it is included when arriving here. diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index fe3d0ea0058a..b3ee44a40c2f 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -6,6 +6,7 @@ #include <linux/string.h> #include <linux/irqflags.h> +#include <linux/delay.h> #include <asm/hvcall.h> #include <asm/paca.h> @@ -343,6 +344,212 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) return rc; } +static inline long plpar_guest_create(unsigned long flags, unsigned long *guest_id) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + unsigned long token; + long rc; + + token = -1UL; + do { + rc = plpar_hcall(H_GUEST_CREATE, retbuf, flags, token); + if (rc == H_SUCCESS) + *guest_id = retbuf[0]; + + if (rc == H_BUSY) { + token = retbuf[0]; + cond_resched(); + } + + if (H_IS_LONG_BUSY(rc)) { + token = retbuf[0]; + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } + + } while (rc == H_BUSY); + + return rc; +} + +static inline long plpar_guest_create_vcpu(unsigned long flags, + unsigned long guest_id, + unsigned long vcpu_id) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_GUEST_CREATE_VCPU, 0, guest_id, vcpu_id); + + if (rc == H_BUSY) + cond_resched(); + + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } + + } while (rc == H_BUSY); + + return rc; +} + +static inline long plpar_guest_set_state(unsigned long flags, + unsigned long guest_id, + unsigned long vcpu_id, + unsigned long data_buffer, + unsigned long data_size, + unsigned long *failed_index) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + while (true) { + rc = plpar_hcall(H_GUEST_SET_STATE, retbuf, flags, guest_id, + vcpu_id, data_buffer, data_size); + + if (rc == H_BUSY) { + cpu_relax(); + continue; + } + + if (H_IS_LONG_BUSY(rc)) { + mdelay(get_longbusy_msecs(rc)); + continue; + } + + if (rc == H_INVALID_ELEMENT_ID) + *failed_index = retbuf[0]; + else if (rc == H_INVALID_ELEMENT_SIZE) + *failed_index = retbuf[0]; + else if (rc == H_INVALID_ELEMENT_VALUE) + *failed_index = retbuf[0]; + + break; + } + + return rc; +} + +static inline long plpar_guest_get_state(unsigned long flags, + unsigned long guest_id, + unsigned long vcpu_id, + unsigned long data_buffer, + unsigned long data_size, + unsigned long *failed_index) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + while (true) { + rc = plpar_hcall(H_GUEST_GET_STATE, retbuf, flags, guest_id, + vcpu_id, data_buffer, data_size); + + if (rc == H_BUSY) { + cpu_relax(); + continue; + } + + if (H_IS_LONG_BUSY(rc)) { + mdelay(get_longbusy_msecs(rc)); + continue; + } + + if (rc == H_INVALID_ELEMENT_ID) + *failed_index = retbuf[0]; + else if (rc == H_INVALID_ELEMENT_SIZE) + *failed_index = retbuf[0]; + else if (rc == H_INVALID_ELEMENT_VALUE) + *failed_index = retbuf[0]; + + break; + } + + return rc; +} + +static inline long plpar_guest_run_vcpu(unsigned long flags, unsigned long guest_id, + unsigned long vcpu_id, int *trap, + unsigned long *failed_index) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GUEST_RUN_VCPU, retbuf, flags, guest_id, vcpu_id); + if (rc == H_SUCCESS) + *trap = retbuf[0]; + else if (rc == H_INVALID_ELEMENT_ID) + *failed_index = retbuf[0]; + else if (rc == H_INVALID_ELEMENT_SIZE) + *failed_index = retbuf[0]; + else if (rc == H_INVALID_ELEMENT_VALUE) + *failed_index = retbuf[0]; + + return rc; +} + +static inline long plpar_guest_delete(unsigned long flags, u64 guest_id) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_GUEST_DELETE, flags, guest_id); + if (rc == H_BUSY) + cond_resched(); + + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } + + } while (rc == H_BUSY); + + return rc; +} + +static inline long plpar_guest_set_capabilities(unsigned long flags, + unsigned long capabilities) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + do { + rc = plpar_hcall(H_GUEST_SET_CAPABILITIES, retbuf, flags, capabilities); + if (rc == H_BUSY) + cond_resched(); + + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } + } while (rc == H_BUSY); + + return rc; +} + +static inline long plpar_guest_get_capabilities(unsigned long flags, + unsigned long *capabilities) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + do { + rc = plpar_hcall(H_GUEST_GET_CAPABILITIES, retbuf, flags); + if (rc == H_BUSY) + cond_resched(); + + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + *capabilities = retbuf[0]; + + return rc; +} + /* * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately * @@ -355,7 +562,7 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) * error recovery of killing the process/guest will be eventually * needed. */ -static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, +static inline long pseries_rpt_invalidate(u64 pid, u64 target, u64 type, u64 page_sizes, u64 start, u64 end) { long rc; @@ -401,12 +608,68 @@ static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex, return 0; } -static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, +static inline long pseries_rpt_invalidate(u64 pid, u64 target, u64 type, u64 page_sizes, u64 start, u64 end) { return 0; } +static inline long plpar_guest_create_vcpu(unsigned long flags, + unsigned long guest_id, + unsigned long vcpu_id) +{ + return 0; +} + +static inline long plpar_guest_get_state(unsigned long flags, + unsigned long guest_id, + unsigned long vcpu_id, + unsigned long data_buffer, + unsigned long data_size, + unsigned long *failed_index) +{ + return 0; +} + +static inline long plpar_guest_set_state(unsigned long flags, + unsigned long guest_id, + unsigned long vcpu_id, + unsigned long data_buffer, + unsigned long data_size, + unsigned long *failed_index) +{ + return 0; +} + +static inline long plpar_guest_run_vcpu(unsigned long flags, unsigned long guest_id, + unsigned long vcpu_id, int *trap, + unsigned long *failed_index) +{ + return 0; +} + +static inline long plpar_guest_create(unsigned long flags, unsigned long *guest_id) +{ + return 0; +} + +static inline long plpar_guest_delete(unsigned long flags, u64 guest_id) +{ + return 0; +} + +static inline long plpar_guest_get_capabilities(unsigned long flags, + unsigned long *capabilities) +{ + return 0; +} + +static inline long plpar_guest_set_capabilities(unsigned long flags, + unsigned long capabilities) +{ + return 0; +} + #endif /* CONFIG_PPC_PSERIES */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 9db8b16567e2..ea8f91fbc62f 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -397,6 +397,23 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, return 0; } +/** + * regs_get_kernel_argument() - get Nth function argument in kernel + * @regs: pt_regs of that context + * @n: function argument number (start from 0) + * + * We support up to 8 arguments and assume they are sent in through the GPRs. + * This will fail for fp/vector arguments, but those aren't usually found in + * kernel code. This is expected to be called from kprobes or ftrace with regs. + */ +static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, unsigned int n) +{ +#define NR_REG_ARGUMENTS 8 + if (n < NR_REG_ARGUMENTS) + return regs_get_register(regs, offsetof(struct pt_regs, gpr[3 + n])); + return 0; +} + #endif /* __ASSEMBLY__ */ #ifndef __powerpc64__ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index fb725ec77926..f1f9890f50d3 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -374,7 +374,7 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n) if (check_copy_size(from, n, true)) { if (access_ok(to, n)) { allow_write_to_user(to, n); - n = copy_mc_generic((void *)to, from, n); + n = copy_mc_generic((void __force *)to, from, n); prevent_write_to_user(to, n); } } diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 19e46fd623b0..7f63f1cdc6c3 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -8,6 +8,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/font.h> #include <linux/memblock.h> #include <linux/pgtable.h> #include <linux/of.h> @@ -41,10 +42,6 @@ static unsigned char *logicalDisplayBase __force_data; unsigned long disp_BAT[2] __initdata = {0, 0}; -#define cmapsz (16*256) - -static unsigned char vga_font[cmapsz]; - static int boot_text_mapped __force_data; extern void rmci_on(void); @@ -407,7 +404,7 @@ static unsigned int expand_bits_16[4] = { }; -static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +static void draw_byte_32(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xFFFFFFFFUL; @@ -428,7 +425,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_16(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xFFFFFFFFUL; @@ -446,7 +443,7 @@ static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_8(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0x0F0F0F0FUL; @@ -465,7 +462,8 @@ static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) static noinline void draw_byte(unsigned char c, long locX, long locY) { unsigned char *base = calc_base(locX << 3, locY << 4); - unsigned char *font = &vga_font[((unsigned int)c) * 16]; + unsigned int font_index = c * 16; + const unsigned char *font = font_sun_8x16.data + font_index; int rb = dispDeviceRowBytes; rmci_maybe_on(); @@ -583,349 +581,3 @@ void __init udbg_init_btext(void) */ udbg_putc = btext_drawchar; } - -static unsigned char vga_font[cmapsz] = { -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, -0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, -0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, -0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, -0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, -0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, -0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, -0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, -0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, -0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, -0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, -0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, -0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, -0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, -0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, -0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, -0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, -0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, -0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, -0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, -0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, -0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, -0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, -0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, -0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, -0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, -0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, -0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, -0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, -0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, -0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, -0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, -0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, -0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, -0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, -0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, -0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, -0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, -0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, -0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, -0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, -0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, -0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, -0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, -0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, -0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, -0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, -0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, -0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, -0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, -0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, -0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, -0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, -0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, -0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, -0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, -0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, -0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, -0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, -0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, -0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, -0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, -0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, -0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, -0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, -0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, -0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, -0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, -0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, -0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, -0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, -0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, -0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, -0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, -0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, -0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, -0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, -}; - diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9a3b85bfc83f..2086fa6cdc25 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -19,6 +19,7 @@ #include <linux/uio.h> #include <asm/rtas.h> #include <asm/inst.h> +#include <asm/fadump.h> #ifdef DEBUG #include <asm/udbg.h> @@ -92,6 +93,17 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, return csize; } +/* + * Return true only when kexec based kernel dump capturing method is used. + * This ensures all restritions applied for kdump case are not automatically + * applied for fadump case. + */ +bool is_kdump_kernel(void) +{ + return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX; +} +EXPORT_SYMBOL_GPL(is_kdump_kernel); + #ifdef CONFIG_PPC_RTAS /* * The crashkernel region will almost always overlap the RTAS region, so diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 438568a472d0..48773d2d9be3 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -39,7 +39,7 @@ static int eeh_result_priority(enum pci_ers_result result) case PCI_ERS_RESULT_NEED_RESET: return 6; default: - WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result); + WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", result); return 0; } }; @@ -60,7 +60,7 @@ static const char *pci_ers_result_name(enum pci_ers_result result) case PCI_ERS_RESULT_NO_AER_DRIVER: return "no AER driver"; default: - WARN_ONCE(1, "Unknown result type: %d\n", (int)result); + WARN_ONCE(1, "Unknown result type: %d\n", result); return "unknown"; } }; diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index b32e7b2ebdcf..9fc90410b385 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -312,13 +312,13 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED + li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ andc. r9, r9, r11 /* Check permission */ bne 5f - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ + rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */ + and r9, r9, r11 /* hwwrite = dirty & w */ + rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. @@ -400,9 +400,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) andc. r9, r9, r11 /* Check permission */ bne 5f - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ + rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */ + and r9, r9, r11 /* hwwrite = dirty & w */ + rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. @@ -561,10 +561,11 @@ finish_tlb_load: /* * Clear out the software-only bits in the PTE to generate the * TLB_DATA value. These are the bottom 2 bits of the RPM, the - * top 3 bits of the zone field, and M. + * 4 bits of the zone field, and M. */ - li r9, 0x0ce2 + li r9, 0x0cf2 andc r11, r11, r9 + rlwimi r11, r10, 8, 24, 27 /* Copy 4 upper address bit into zone */ /* load the next available TLB index. */ lwz r9, tlb_4xx_index@l(0) diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index a3197c9f721c..25642e802ed3 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -314,8 +314,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -342,7 +342,7 @@ interrupt_base: mtspr SPRN_MMUCR,r12 /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -355,7 +355,7 @@ interrupt_base: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -428,8 +428,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -515,6 +515,7 @@ interrupt_base: * r11 - PTE high word value * r12 - PTE low word value * r13 - TLB index + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -533,11 +534,10 @@ finish_tlb_load_44x: tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ /* And WS 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ @@ -568,8 +568,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -586,7 +586,7 @@ finish_tlb_load_44x: 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -599,7 +599,7 @@ finish_tlb_load_44x: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -669,8 +669,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -744,6 +744,7 @@ finish_tlb_load_44x: * r11 - PTE high word value * r12 - PTE low word value * r13 - free to use + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -753,11 +754,10 @@ finish_tlb_load_47x: tlbwe r11,r13,1 /* And make up word 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 1: tlbwe r11,r13,2 diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 0f1641a31250..39724ff5ae1f 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -471,7 +471,7 @@ END_BTB_FLUSH_SECTION 4: /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -485,10 +485,10 @@ END_BTB_FLUSH_SECTION */ mfspr r12,SPRN_ESR #ifdef CONFIG_PTE_64BIT - li r13,_PAGE_PRESENT + li r13,_PAGE_PRESENT|_PAGE_BAP_SR oris r13,r13,_PAGE_ACCESSED@h #else - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED #endif rlwimi r13,r12,11,29,29 @@ -783,15 +783,15 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #else - li r10, (_PAGE_EXEC | _PAGE_PRESENT) + li r10, (_PAGE_EXEC | _PAGE_READ) mr r13, r11 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 - andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ + mcrf cr0, cr5 /* Test for user page */ slwi r10, r12, 1 or r10, r10, r12 rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ - iseleq r12, r12, r10 + isellt r12, r10, r12 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ mtspr SPRN_MAS3, r13 #endif diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 6764b98ca360..c1d89764dd22 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -412,10 +412,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: /* - * r0: scratch + * r0: userspace flag (later scratch) * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte - * r3: scratch + * r3: fault address */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS @@ -424,12 +424,13 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_MODULES + li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + li r0, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -437,13 +438,15 @@ InstructionTLBMiss: rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ +#ifdef CONFIG_MODULES + rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */ +#endif ori r1, r1, 0xe06 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 1 : 0 */ + andc r1, r2, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -478,38 +481,38 @@ InstructionAddressInvalid: . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: /* - * r0: scratch + * r0: userspace flag (later scratch) * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte - * r3: scratch + * r3: fault address */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ rlwinm r2, r2, 28, 0xfffff000 + li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_PRESENT | _PAGE_ACCESSED + li r0, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ - rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ + rlwimi r2,r0,0,30,31 /* userspace ? -> PP */ + rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ + andc r1,r2,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -558,34 +561,35 @@ DataAddressInvalid: . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: /* - * r0: scratch + * r0: userspace flag (later scratch) * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte - * r3: scratch + * r3: fault address */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED rlwinm r2, r2, 28, 0xfffff000 + li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + li r0, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + rlwimi r2,r0,0,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ - andc r1,r0,r1 /* PP = user? 1: 0 */ + andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -689,7 +693,8 @@ hash_page_dsi: mfdar r4 mfsrr0 r5 mfsrr1 r9 - rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ + rlwinm r3, r3, 32 - 15, _PAGE_WRITE /* DSISR_STORE -> _PAGE_WRITE */ + ori r3, r3, _PAGE_PRESENT | _PAGE_READ bl hash_page mfspr r10, SPRN_SPRG_THREAD restore_regs_thread r10 @@ -699,7 +704,7 @@ hash_page_isi: mr r11, r10 mfspr r10, SPRN_SPRG_THREAD save_regs_thread r10 - li r3, 0 + li r3, _PAGE_PRESENT | _PAGE_EXEC lwz r4, SRR0(r10) lwz r9, SRR1(r10) bl hash_page diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 2f29b7d432de..6af535905984 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -33,7 +33,7 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count) return; asm volatile("sync"); do { - tmp = *port; + tmp = *(const volatile u8 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); @@ -49,7 +49,7 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count) return; asm volatile("sync"); do { - *port = *tbuf++; + *(volatile u8 __force *)port = *tbuf++; } while (--count != 0); asm volatile("sync"); } @@ -64,7 +64,7 @@ void _insw_ns(const volatile u16 __iomem *port, void *buf, long count) return; asm volatile("sync"); do { - tmp = *port; + tmp = *(const volatile u16 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); @@ -80,7 +80,7 @@ void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) return; asm volatile("sync"); do { - *port = *tbuf++; + *(volatile u16 __force *)port = *tbuf++; } while (--count != 0); asm volatile("sync"); } @@ -95,7 +95,7 @@ void _insl_ns(const volatile u32 __iomem *port, void *buf, long count) return; asm volatile("sync"); do { - tmp = *port; + tmp = *(const volatile u32 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); @@ -111,7 +111,7 @@ void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) return; asm volatile("sync"); do { - *port = *tbuf++; + *(volatile u32 __force *)port = *tbuf++; } while (--count != 0); asm volatile("sync"); } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 14251bc5219e..ebe259bdd462 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1074,10 +1074,10 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) } EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); -extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, - struct iommu_table *tbl, - unsigned long entry, unsigned long *hpa, - enum dma_data_direction *direction) +long iommu_tce_xchg_no_kill(struct mm_struct *mm, + struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) { long ret; unsigned long size = 0; @@ -1280,13 +1280,19 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = { /* * A simple iommu_ops to allow less cruft in generic VFIO code. */ -static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, - struct device *dev) +static int +spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_group *grp = iommu_group_get(dev); struct iommu_table_group *table_group; int ret = -EINVAL; + /* At first attach the ownership is already set */ + if (!domain) + return 0; + if (!grp) return -ENODEV; @@ -1297,17 +1303,22 @@ static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, return ret; } -static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev) -{ - struct iommu_group *grp = iommu_group_get(dev); - struct iommu_table_group *table_group; +static const struct iommu_domain_ops spapr_tce_platform_domain_ops = { + .attach_dev = spapr_tce_platform_iommu_attach_dev, +}; - table_group = iommu_group_get_iommudata(grp); - table_group->ops->release_ownership(table_group); -} +static struct iommu_domain spapr_tce_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &spapr_tce_platform_domain_ops, +}; -static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = { - .attach_dev = spapr_tce_blocking_iommu_attach_dev, +static struct iommu_domain spapr_tce_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + /* + * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain + * also sets the dma_api ops + */ + .ops = &spapr_tce_platform_domain_ops, }; static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) @@ -1322,22 +1333,6 @@ static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) -{ - struct iommu_domain *dom; - - if (type != IOMMU_DOMAIN_BLOCKED) - return NULL; - - dom = kzalloc(sizeof(*dom), GFP_KERNEL); - if (!dom) - return NULL; - - dom->ops = &spapr_tce_blocking_domain_ops; - - return dom; -} - static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) { struct pci_dev *pdev; @@ -1371,12 +1366,12 @@ static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) } static const struct iommu_ops spapr_tce_iommu_ops = { + .default_domain = &spapr_tce_platform_domain, + .blocked_domain = &spapr_tce_blocked_domain, .capable = spapr_tce_iommu_capable, - .domain_alloc = spapr_tce_iommu_domain_alloc, .probe_device = spapr_tce_iommu_probe_device, .release_device = spapr_tce_iommu_release_device, .device_group = spapr_tce_iommu_device_group, - .set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma, }; static struct attribute *spapr_tce_iommu_attrs[] = { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b68898ac07e1..392404688cec 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2258,6 +2258,22 @@ unsigned long __get_wchan(struct task_struct *p) return ret; } +static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk) +{ + unsigned long stack_page; + + // A non-empty pt_regs should never have a zero MSR or TRAP value. + if (regs->msr || regs->trap) + return false; + + // Check it sits at the very base of the stack + stack_page = (unsigned long)task_stack_page(tsk); + if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE) + return false; + + return true; +} + static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; void __no_sanitize_address show_stack(struct task_struct *tsk, @@ -2322,9 +2338,13 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, lr = regs->link; printk("%s--- interrupt: %lx at %pS\n", loglvl, regs->trap, (void *)regs->nip); - __show_regs(regs); - printk("%s--- interrupt: %lx\n", - loglvl, regs->trap); + + // Detect the case of an empty pt_regs at the very base + // of the stack and suppress showing it in full. + if (!empty_user_regs(regs, tsk)) { + __show_regs(regs); + printk("%s--- interrupt: %lx\n", loglvl, regs->trap); + } firstframe = 1; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index d464ba412084..e67effdba85c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -947,7 +947,7 @@ struct option_vector7 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[14]; + struct { __be32 mask, val; } pvrs[14]; u8 num_vectors; diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 5d7a72b41ae7..727ed4a14545 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) @@ -9,10 +10,6 @@ * * Modified by Cort Dougan (cort@hq.fsmlabs.com) * and Paul Mackerras (paulus@samba.org). - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/regset.h> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 20f72cd1d813..9b142b9d5187 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -22,7 +22,6 @@ #include <linux/seq_file.h> #include <linux/ioport.h> #include <linux/console.h> -#include <linux/screen_info.h> #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/unistd.h> @@ -98,21 +97,6 @@ int boot_cpu_hwid = -1; int dcache_bsize; int icache_bsize; -/* - * This still seems to be needed... -- paulus - */ -struct screen_info screen_info = { - .orig_x = 0, - .orig_y = 25, - .orig_video_cols = 80, - .orig_video_lines = 25, - .orig_video_isVGA = 1, - .orig_video_points = 16 -}; -#if defined(CONFIG_FB_VGA16_MODULE) -EXPORT_SYMBOL(screen_info); -#endif - /* Variables required to store legacy IO irq routing */ int of_i8042_kbd_irq; EXPORT_SYMBOL_GPL(of_i8042_kbd_irq); @@ -601,7 +585,6 @@ struct seq_buf ppc_hw_desc __initdata = { .buffer = ppc_hw_desc_buf, .size = sizeof(ppc_hw_desc_buf), .len = 0, - .readpos = 0, }; static __init void probe_machine(void) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 246201d0d879..2f19d5e94485 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -364,7 +364,7 @@ void __init early_setup(unsigned long dt_ptr) */ initialise_paca(&boot_paca, 0); fixup_boot_paca(&boot_paca); - WARN_ON(local_paca != 0); + WARN_ON(local_paca); setup_paca(&boot_paca); /* install the paca into registers */ /* -------- printk is now safe to use ------- */ diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 68a91e553e14..aa17e62f3754 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Common signal handling code for both 32 and 64 bits * * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/resume_user_mode.h> diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index a429c57ed433..58ecea1cdc27 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -1,10 +1,7 @@ -/* +/* SPDX-License-Identifier: GPL-2.0-or-later + * * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #ifndef _POWERPC_ARCH_SIGNAL_H diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 64ff37721fd0..5ea2014aff90 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -157,7 +157,7 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; -extern void panic_flush_kmsg_start(void) +void panic_flush_kmsg_start(void) { /* * These are mostly taken from kernel/panic.c, but tries to do @@ -170,7 +170,7 @@ extern void panic_flush_kmsg_start(void) bust_spinlocks(1); } -extern void panic_flush_kmsg_end(void) +void panic_flush_kmsg_end(void) { kmsg_dump(KMSG_DUMP_PANIC); bust_spinlocks(0); @@ -1164,6 +1164,7 @@ void emulate_single_step(struct pt_regs *regs) __single_step_exception(regs); } +#ifdef CONFIG_PPC_FPU_REGS static inline int __parse_fpscr(unsigned long fpscr) { int ret = FPE_FLTUNK; @@ -1190,6 +1191,7 @@ static inline int __parse_fpscr(unsigned long fpscr) return ret; } +#endif static void parse_fpe(struct pt_regs *regs) { diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 9346c960b296..85846cadb9b5 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -74,6 +74,9 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_STRUCT_SIZE(mmu_psize_def); VMCOREINFO_OFFSET(mmu_psize_def, shift); #endif + VMCOREINFO_SYMBOL(cur_cpu_spec); + VMCOREINFO_OFFSET(cpu_spec, mmu_features); + vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled()); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); } diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index a79e28c91e2b..0bee7ca9a77c 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -379,8 +379,8 @@ void default_machine_kexec(struct kimage *image) #ifdef CONFIG_PPC_64S_HASH_MMU /* Values we need to export to the second kernel via the device tree. */ -static unsigned long htab_base; -static unsigned long htab_size; +static __be64 htab_base; +static __be64 htab_size; static struct property htab_base_prop = { .name = "linux,htab-base", diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index a3de5369d22c..961a6dd67365 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -32,7 +32,7 @@ #include <asm/plpks.h> struct umem_info { - u64 *buf; /* data buffer for usable-memory property */ + __be64 *buf; /* data buffer for usable-memory property */ u32 size; /* size allocated for the data buffer */ u32 max_entries; /* maximum no. of entries */ u32 idx; /* index of current entry */ @@ -443,10 +443,10 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, * * Returns buffer on success, NULL on error. */ -static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) +static __be64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) { u32 new_size; - u64 *tbuf; + __be64 *tbuf; if ((um_info->idx + cnt) <= um_info->max_entries) return um_info->buf; @@ -1138,11 +1138,15 @@ static int update_pci_dma_nodes(void *fdt, const char *dmapropname) continue; ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window"); - if (ret < 0) + if (ret < 0) { + of_node_put(dn); break; + } ret = copy_property(fdt, pci_offset, dn, dmapropname); - if (ret < 0) + if (ret < 0) { + of_node_put(dn); break; + } } return ret; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 5319d889b184..4bd9d1230869 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -87,8 +87,12 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_ras.o \ book3s_hv_builtin.o \ book3s_hv_p9_perf.o \ + book3s_hv_nestedv2.o \ + guest-state-buffer.o \ $(kvm-book3s_64-builtin-tm-objs-y) \ $(kvm-book3s_64-builtin-xics-objs-y) + +obj-$(CONFIG_GUEST_STATE_BUFFER_TEST) += test-guest-state-buffer.o endif kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 686d8d9eda3e..6cd20ab9e94e 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -565,7 +565,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->msr = kvmppc_get_msr(vcpu); regs->srr0 = kvmppc_get_srr0(vcpu); regs->srr1 = kvmppc_get_srr1(vcpu); - regs->pid = vcpu->arch.pid; + regs->pid = kvmppc_get_pid(vcpu); regs->sprg0 = kvmppc_get_sprg0(vcpu); regs->sprg1 = kvmppc_get_sprg1(vcpu); regs->sprg2 = kvmppc_get_sprg2(vcpu); @@ -636,17 +636,17 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: i = id - KVM_REG_PPC_FPR0; - *val = get_reg_val(id, VCPU_FPR(vcpu, i)); + *val = get_reg_val(id, kvmppc_get_fpr(vcpu, i)); break; case KVM_REG_PPC_FPSCR: - *val = get_reg_val(id, vcpu->arch.fp.fpscr); + *val = get_reg_val(id, kvmppc_get_fpscr(vcpu)); break; #ifdef CONFIG_VSX case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { i = id - KVM_REG_PPC_VSR0; - val->vsxval[0] = vcpu->arch.fp.fpr[i][0]; - val->vsxval[1] = vcpu->arch.fp.fpr[i][1]; + val->vsxval[0] = kvmppc_get_vsx_fpr(vcpu, i, 0); + val->vsxval[1] = kvmppc_get_vsx_fpr(vcpu, i, 1); } else { r = -ENXIO; } @@ -683,19 +683,19 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.fscr); break; case KVM_REG_PPC_TAR: - *val = get_reg_val(id, vcpu->arch.tar); + *val = get_reg_val(id, kvmppc_get_tar(vcpu)); break; case KVM_REG_PPC_EBBHR: - *val = get_reg_val(id, vcpu->arch.ebbhr); + *val = get_reg_val(id, kvmppc_get_ebbhr(vcpu)); break; case KVM_REG_PPC_EBBRR: - *val = get_reg_val(id, vcpu->arch.ebbrr); + *val = get_reg_val(id, kvmppc_get_ebbrr(vcpu)); break; case KVM_REG_PPC_BESCR: - *val = get_reg_val(id, vcpu->arch.bescr); + *val = get_reg_val(id, kvmppc_get_bescr(vcpu)); break; case KVM_REG_PPC_IC: - *val = get_reg_val(id, vcpu->arch.ic); + *val = get_reg_val(id, kvmppc_get_ic(vcpu)); break; default: r = -EINVAL; @@ -724,7 +724,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: i = id - KVM_REG_PPC_FPR0; - VCPU_FPR(vcpu, i) = set_reg_val(id, *val); + kvmppc_set_fpr(vcpu, i, set_reg_val(id, *val)); break; case KVM_REG_PPC_FPSCR: vcpu->arch.fp.fpscr = set_reg_val(id, *val); @@ -733,8 +733,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { i = id - KVM_REG_PPC_VSR0; - vcpu->arch.fp.fpr[i][0] = val->vsxval[0]; - vcpu->arch.fp.fpr[i][1] = val->vsxval[1]; + kvmppc_set_vsx_fpr(vcpu, i, 0, val->vsxval[0]); + kvmppc_set_vsx_fpr(vcpu, i, 1, val->vsxval[1]); } else { r = -ENXIO; } @@ -765,22 +765,22 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, break; #endif /* CONFIG_KVM_XIVE */ case KVM_REG_PPC_FSCR: - vcpu->arch.fscr = set_reg_val(id, *val); + kvmppc_set_fpscr(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_TAR: - vcpu->arch.tar = set_reg_val(id, *val); + kvmppc_set_tar(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_EBBHR: - vcpu->arch.ebbhr = set_reg_val(id, *val); + kvmppc_set_ebbhr(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_EBBRR: - vcpu->arch.ebbrr = set_reg_val(id, *val); + kvmppc_set_ebbrr(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_BESCR: - vcpu->arch.bescr = set_reg_val(id, *val); + kvmppc_set_bescr(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_IC: - vcpu->arch.ic = set_reg_val(id, *val); + kvmppc_set_ic(vcpu, set_reg_val(id, *val)); break; default: r = -EINVAL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index efd0ebf70a5e..2b1f0cdd8c18 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -28,6 +28,7 @@ #include <asm/pte-walk.h> #include "book3s.h" +#include "book3s_hv.h" #include "trace_hv.h" //#define DEBUG_RESIZE_HPT 1 @@ -120,7 +121,7 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info) kvm->arch.hpt = *info; kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18); - pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n", + pr_debug("KVM guest htab at %lx (order %ld), LPID %llx\n", info->virt, (long)info->order, kvm->arch.lpid); } @@ -347,7 +348,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long v, orig_v, gr; __be64 *hptep; long int index; - int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); + int virtmode = __kvmppc_get_msr_hv(vcpu) & (data ? MSR_DR : MSR_IR); if (kvm_is_radix(vcpu->kvm)) return kvmppc_mmu_radix_xlate(vcpu, eaddr, gpte, data, iswrite); @@ -385,7 +386,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, /* Get PP bits and key for permission check */ pp = gr & (HPTE_R_PP0 | HPTE_R_PP); - key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; + key = (__kvmppc_get_msr_hv(vcpu) & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; key &= slb_v; /* Calculate permissions */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 572707858d65..175a8eb2681f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -15,6 +15,7 @@ #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> +#include "book3s_hv.h" #include <asm/page.h> #include <asm/mmu.h> #include <asm/pgalloc.h> @@ -96,7 +97,7 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, void *from, unsigned long n) { int lpid = vcpu->kvm->arch.lpid; - int pid = vcpu->arch.pid; + int pid = kvmppc_get_pid(vcpu); /* This would cause a data segment intr so don't allow the access */ if (eaddr & (0x3FFUL << 52)) @@ -270,7 +271,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, /* Work out effective PID */ switch (eaddr >> 62) { case 0: - pid = vcpu->arch.pid; + pid = kvmppc_get_pid(vcpu); break; case 3: pid = 0; @@ -294,9 +295,9 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, } else { if (!(pte & _PAGE_PRIVILEGED)) { /* Check AMR/IAMR to see if strict mode is in force */ - if (vcpu->arch.amr & (1ul << 62)) + if (kvmppc_get_amr_hv(vcpu) & (1ul << 62)) gpte->may_read = 0; - if (vcpu->arch.amr & (1ul << 63)) + if (kvmppc_get_amr_hv(vcpu) & (1ul << 63)) gpte->may_write = 0; if (vcpu->arch.iamr & (1ul << 62)) gpte->may_execute = 0; @@ -307,7 +308,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, } void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, - unsigned int pshift, unsigned int lpid) + unsigned int pshift, u64 lpid) { unsigned long psize = PAGE_SIZE; int psi; @@ -344,7 +345,7 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc); } -static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid) +static void kvmppc_radix_flush_pwc(struct kvm *kvm, u64 lpid) { long rc; @@ -417,7 +418,7 @@ static void kvmppc_pmd_free(pmd_t *pmdp) void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, unsigned int shift, const struct kvm_memory_slot *memslot, - unsigned int lpid) + u64 lpid) { unsigned long old; @@ -468,7 +469,7 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, * (or 4kB) mappings (of sub-pages of the same 2MB page). */ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, - unsigned int lpid) + u64 lpid) { if (full) { memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE); @@ -489,7 +490,7 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, } static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full, - unsigned int lpid) + u64 lpid) { unsigned long im; pmd_t *p = pmd; @@ -518,7 +519,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full, } static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, - unsigned int lpid) + u64 lpid) { unsigned long iu; pud_t *p = pud; @@ -539,7 +540,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, pud_free(kvm->mm, pud); } -void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid) +void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, u64 lpid) { unsigned long ig; @@ -566,7 +567,7 @@ void kvmppc_free_radix(struct kvm *kvm) } static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd, - unsigned long gpa, unsigned int lpid) + unsigned long gpa, u64 lpid) { pte_t *pte = pte_offset_kernel(pmd, 0); @@ -582,7 +583,7 @@ static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd, } static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, - unsigned long gpa, unsigned int lpid) + unsigned long gpa, u64 lpid) { pmd_t *pmd = pmd_offset(pud, 0); @@ -608,7 +609,7 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long gpa, unsigned int level, - unsigned long mmu_seq, unsigned int lpid, + unsigned long mmu_seq, u64 lpid, unsigned long *rmapp, struct rmap_nested **n_rmap) { pgd_t *pgd; @@ -785,7 +786,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, } bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, - unsigned long gpa, unsigned int lpid) + unsigned long gpa, u64 lpid) { unsigned long pgflags; unsigned int shift; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 93b695b289e9..14c6d7e318da 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -77,8 +77,8 @@ static void kvm_spapr_tce_liobn_put(struct kref *kref) call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); } -extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, - struct iommu_group *grp) +void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, + struct iommu_group *grp) { int i; struct kvmppc_spapr_tce_table *stt; @@ -105,8 +105,8 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, rcu_read_unlock(); } -extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, - struct iommu_group *grp) +long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, + struct iommu_group *grp) { struct kvmppc_spapr_tce_table *stt = NULL; bool found = false; @@ -786,12 +786,12 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, idx = (ioba >> stt->page_shift) - stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; if (!page) { - vcpu->arch.regs.gpr[4] = 0; + kvmppc_set_gpr(vcpu, 4, 0); return H_SUCCESS; } tbl = (u64 *)page_address(page); - vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; + kvmppc_set_gpr(vcpu, 4, tbl[idx % TCES_PER_PAGE]); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 130bafdb1430..1ed6ec140701 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -393,7 +393,7 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { - unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; + unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; /* We can (emulate) our own architecture version and anything older */ @@ -424,9 +424,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) break; case PVR_ARCH_300: guest_pcr_bit = PCR_ARCH_300; + cap = H_GUEST_CAP_POWER9; break; case PVR_ARCH_31: guest_pcr_bit = PCR_ARCH_31; + cap = H_GUEST_CAP_POWER10; break; default: return -EINVAL; @@ -437,8 +439,14 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) if (guest_pcr_bit > host_pcr_bit) return -EINVAL; + if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) { + if (!(cap & nested_capabilities)) + return -EINVAL; + } + spin_lock(&vc->lock); vc->arch_compat = arch_compat; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LOGICAL_PVR); /* * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit * Also set all reserved PCR bits @@ -794,7 +802,7 @@ static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu, vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen); - __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen); + __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + kvmppc_get_tb_offset(vcpu), stolen); vcpu->arch.vpa.dirty = true; } @@ -845,9 +853,9 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) { - if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) + if (kvmppc_get_arch_compat(vcpu) >= PVR_ARCH_207) return true; - if ((!vcpu->arch.vcore->arch_compat) && + if ((!kvmppc_get_arch_compat(vcpu)) && cpu_has_feature(CPU_FTR_ARCH_207S)) return true; return false; @@ -868,7 +876,7 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, /* Guests can't breakpoint the hypervisor */ if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER) return H_P3; - vcpu->arch.ciabr = value1; + kvmppc_set_ciabr_hv(vcpu, value1); return H_SUCCESS; case H_SET_MODE_RESOURCE_SET_DAWR0: if (!kvmppc_power8_compatible(vcpu)) @@ -879,8 +887,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, return H_UNSUPPORTED_FLAG_START; if (value2 & DABRX_HYP) return H_P4; - vcpu->arch.dawr0 = value1; - vcpu->arch.dawrx0 = value2; + kvmppc_set_dawr0_hv(vcpu, value1); + kvmppc_set_dawrx0_hv(vcpu, value2); return H_SUCCESS; case H_SET_MODE_RESOURCE_SET_DAWR1: if (!kvmppc_power8_compatible(vcpu)) @@ -895,8 +903,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, return H_UNSUPPORTED_FLAG_START; if (value2 & DABRX_HYP) return H_P4; - vcpu->arch.dawr1 = value1; - vcpu->arch.dawrx1 = value2; + kvmppc_set_dawr1_hv(vcpu, value1); + kvmppc_set_dawrx1_hv(vcpu, value2); return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: /* @@ -1267,10 +1275,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_HOST; break; #endif - case H_RANDOM: - if (!arch_get_random_seed_longs(&vcpu->arch.regs.gpr[4], 1)) + case H_RANDOM: { + unsigned long rand; + + if (!arch_get_random_seed_longs(&rand, 1)) ret = H_HARDWARE; + kvmppc_set_gpr(vcpu, 4, rand); break; + } case H_RPT_INVALIDATE: ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5), @@ -1370,7 +1382,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) */ static void kvmppc_cede(struct kvm_vcpu *vcpu) { - vcpu->arch.shregs.msr |= MSR_EE; + __kvmppc_set_msr_hv(vcpu, __kvmppc_get_msr_hv(vcpu) | MSR_EE); vcpu->arch.ceded = 1; smp_mb(); if (vcpu->arch.prodded) { @@ -1544,7 +1556,7 @@ static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu) if (!(vcpu->arch.hfscr_permitted & HFSCR_PM)) return EMULATE_FAIL; - vcpu->arch.hfscr |= HFSCR_PM; + kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PM); return RESUME_GUEST; } @@ -1554,7 +1566,7 @@ static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu) if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB)) return EMULATE_FAIL; - vcpu->arch.hfscr |= HFSCR_EBB; + kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_EBB); return RESUME_GUEST; } @@ -1564,7 +1576,7 @@ static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu) if (!(vcpu->arch.hfscr_permitted & HFSCR_TM)) return EMULATE_FAIL; - vcpu->arch.hfscr |= HFSCR_TM; + kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM); return RESUME_GUEST; } @@ -1585,7 +1597,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * That can happen due to a bug, or due to a machine check * occurring at just the wrong time. */ - if (vcpu->arch.shregs.msr & MSR_HV) { + if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) { printk(KERN_EMERG "KVM trap in HV mode!\n"); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", vcpu->arch.trap, kvmppc_get_pc(vcpu), @@ -1636,7 +1648,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * so that it knows that the machine check occurred. */ if (!vcpu->kvm->arch.fwnmi_enabled) { - ulong flags = (vcpu->arch.shregs.msr & 0x083c0000) | + ulong flags = (__kvmppc_get_msr_hv(vcpu) & 0x083c0000) | (kvmppc_get_msr(vcpu) & SRR1_PREFIXED); kvmppc_core_queue_machine_check(vcpu, flags); r = RESUME_GUEST; @@ -1666,7 +1678,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * as a result of a hypervisor emulation interrupt * (e40) getting turned into a 700 by BML RTAS. */ - flags = (vcpu->arch.shregs.msr & 0x1f0000ull) | + flags = (__kvmppc_get_msr_hv(vcpu) & 0x1f0000ull) | (kvmppc_get_msr(vcpu) & SRR1_PREFIXED); kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; @@ -1676,7 +1688,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, { int i; - if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) { + if (unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { /* * Guest userspace executed sc 1. This can only be * reached by the P9 path because the old path @@ -1754,7 +1766,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, break; } - if (!(vcpu->arch.shregs.msr & MSR_DR)) + if (!(__kvmppc_get_msr_hv(vcpu) & MSR_DR)) vsid = vcpu->kvm->arch.vrma_slb_v; else vsid = vcpu->arch.fault_gpa; @@ -1778,7 +1790,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, long err; vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); - vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr & + vcpu->arch.fault_dsisr = __kvmppc_get_msr_hv(vcpu) & DSISR_SRR1_MATCH_64S; if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) { /* @@ -1787,7 +1799,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * hash fault handling below is v3 only (it uses ASDR * via fault_gpa). */ - if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) + if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE) vcpu->arch.fault_dsisr |= DSISR_ISSTORE; r = RESUME_PAGE_FAULT; break; @@ -1801,7 +1813,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, break; } - if (!(vcpu->arch.shregs.msr & MSR_IR)) + if (!(__kvmppc_get_msr_hv(vcpu) & MSR_IR)) vsid = vcpu->kvm->arch.vrma_slb_v; else vsid = vcpu->arch.fault_gpa; @@ -1863,7 +1875,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * Otherwise, we just generate a program interrupt to the guest. */ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { - u64 cause = vcpu->arch.hfscr >> 56; + u64 cause = kvmppc_get_hfscr_hv(vcpu) >> 56; r = EMULATE_FAIL; if (cpu_has_feature(CPU_FTR_ARCH_300)) { @@ -1891,7 +1903,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, kvmppc_dump_regs(vcpu); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", vcpu->arch.trap, kvmppc_get_pc(vcpu), - vcpu->arch.shregs.msr); + __kvmppc_get_msr_hv(vcpu)); run->hw.hardware_exit_reason = vcpu->arch.trap; r = RESUME_HOST; break; @@ -1915,11 +1927,11 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) * That can happen due to a bug, or due to a machine check * occurring at just the wrong time. */ - if (vcpu->arch.shregs.msr & MSR_HV) { + if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) { pr_emerg("KVM trap in HV mode while nested!\n"); pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n", vcpu->arch.trap, kvmppc_get_pc(vcpu), - vcpu->arch.shregs.msr); + __kvmppc_get_msr_hv(vcpu)); kvmppc_dump_regs(vcpu); return RESUME_HOST; } @@ -1976,7 +1988,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) & DSISR_SRR1_MATCH_64S; - if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) + if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE) vcpu->arch.fault_dsisr |= DSISR_ISSTORE; srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmhv_nested_page_fault(vcpu); @@ -2183,6 +2195,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, } vc->lpcr = new_lpcr; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR); spin_unlock(&vc->lock); } @@ -2207,64 +2220,64 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.dabrx); break; case KVM_REG_PPC_DSCR: - *val = get_reg_val(id, vcpu->arch.dscr); + *val = get_reg_val(id, kvmppc_get_dscr_hv(vcpu)); break; case KVM_REG_PPC_PURR: - *val = get_reg_val(id, vcpu->arch.purr); + *val = get_reg_val(id, kvmppc_get_purr_hv(vcpu)); break; case KVM_REG_PPC_SPURR: - *val = get_reg_val(id, vcpu->arch.spurr); + *val = get_reg_val(id, kvmppc_get_spurr_hv(vcpu)); break; case KVM_REG_PPC_AMR: - *val = get_reg_val(id, vcpu->arch.amr); + *val = get_reg_val(id, kvmppc_get_amr_hv(vcpu)); break; case KVM_REG_PPC_UAMOR: - *val = get_reg_val(id, vcpu->arch.uamor); + *val = get_reg_val(id, kvmppc_get_uamor_hv(vcpu)); break; case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; - *val = get_reg_val(id, vcpu->arch.mmcr[i]); + *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, i)); break; case KVM_REG_PPC_MMCR2: - *val = get_reg_val(id, vcpu->arch.mmcr[2]); + *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 2)); break; case KVM_REG_PPC_MMCRA: - *val = get_reg_val(id, vcpu->arch.mmcra); + *val = get_reg_val(id, kvmppc_get_mmcra_hv(vcpu)); break; case KVM_REG_PPC_MMCRS: *val = get_reg_val(id, vcpu->arch.mmcrs); break; case KVM_REG_PPC_MMCR3: - *val = get_reg_val(id, vcpu->arch.mmcr[3]); + *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 3)); break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; - *val = get_reg_val(id, vcpu->arch.pmc[i]); + *val = get_reg_val(id, kvmppc_get_pmc_hv(vcpu, i)); break; case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: i = id - KVM_REG_PPC_SPMC1; *val = get_reg_val(id, vcpu->arch.spmc[i]); break; case KVM_REG_PPC_SIAR: - *val = get_reg_val(id, vcpu->arch.siar); + *val = get_reg_val(id, kvmppc_get_siar_hv(vcpu)); break; case KVM_REG_PPC_SDAR: - *val = get_reg_val(id, vcpu->arch.sdar); + *val = get_reg_val(id, kvmppc_get_siar_hv(vcpu)); break; case KVM_REG_PPC_SIER: - *val = get_reg_val(id, vcpu->arch.sier[0]); + *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 0)); break; case KVM_REG_PPC_SIER2: - *val = get_reg_val(id, vcpu->arch.sier[1]); + *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 1)); break; case KVM_REG_PPC_SIER3: - *val = get_reg_val(id, vcpu->arch.sier[2]); + *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 2)); break; case KVM_REG_PPC_IAMR: - *val = get_reg_val(id, vcpu->arch.iamr); + *val = get_reg_val(id, kvmppc_get_iamr_hv(vcpu)); break; case KVM_REG_PPC_PSPB: - *val = get_reg_val(id, vcpu->arch.pspb); + *val = get_reg_val(id, kvmppc_get_pspb_hv(vcpu)); break; case KVM_REG_PPC_DPDES: /* @@ -2279,22 +2292,22 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vcore->vtb); + *val = get_reg_val(id, kvmppc_get_vtb(vcpu)); break; case KVM_REG_PPC_DAWR: - *val = get_reg_val(id, vcpu->arch.dawr0); + *val = get_reg_val(id, kvmppc_get_dawr0_hv(vcpu)); break; case KVM_REG_PPC_DAWRX: - *val = get_reg_val(id, vcpu->arch.dawrx0); + *val = get_reg_val(id, kvmppc_get_dawrx0_hv(vcpu)); break; case KVM_REG_PPC_DAWR1: - *val = get_reg_val(id, vcpu->arch.dawr1); + *val = get_reg_val(id, kvmppc_get_dawr1_hv(vcpu)); break; case KVM_REG_PPC_DAWRX1: - *val = get_reg_val(id, vcpu->arch.dawrx1); + *val = get_reg_val(id, kvmppc_get_dawrx1_hv(vcpu)); break; case KVM_REG_PPC_CIABR: - *val = get_reg_val(id, vcpu->arch.ciabr); + *val = get_reg_val(id, kvmppc_get_ciabr_hv(vcpu)); break; case KVM_REG_PPC_CSIGR: *val = get_reg_val(id, vcpu->arch.csigr); @@ -2306,13 +2319,13 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.tcscr); break; case KVM_REG_PPC_PID: - *val = get_reg_val(id, vcpu->arch.pid); + *val = get_reg_val(id, kvmppc_get_pid(vcpu)); break; case KVM_REG_PPC_ACOP: *val = get_reg_val(id, vcpu->arch.acop); break; case KVM_REG_PPC_WORT: - *val = get_reg_val(id, vcpu->arch.wort); + *val = get_reg_val(id, kvmppc_get_wort_hv(vcpu)); break; case KVM_REG_PPC_TIDR: *val = get_reg_val(id, vcpu->arch.tid); @@ -2338,14 +2351,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, spin_unlock(&vcpu->arch.vpa_update_lock); break; case KVM_REG_PPC_TB_OFFSET: - *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); + *val = get_reg_val(id, kvmppc_get_tb_offset(vcpu)); break; case KVM_REG_PPC_LPCR: case KVM_REG_PPC_LPCR_64: - *val = get_reg_val(id, vcpu->arch.vcore->lpcr); + *val = get_reg_val(id, kvmppc_get_lpcr(vcpu)); break; case KVM_REG_PPC_PPR: - *val = get_reg_val(id, vcpu->arch.ppr); + *val = get_reg_val(id, kvmppc_get_ppr_hv(vcpu)); break; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_REG_PPC_TFHAR: @@ -2414,10 +2427,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, break; #endif case KVM_REG_PPC_ARCH_COMPAT: - *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); + *val = get_reg_val(id, kvmppc_get_arch_compat(vcpu)); break; case KVM_REG_PPC_DEC_EXPIRY: - *val = get_reg_val(id, vcpu->arch.dec_expires); + *val = get_reg_val(id, kvmppc_get_dec_expires(vcpu)); break; case KVM_REG_PPC_ONLINE: *val = get_reg_val(id, vcpu->arch.online); @@ -2425,6 +2438,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_PTCR: *val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr); break; + case KVM_REG_PPC_FSCR: + *val = get_reg_val(id, kvmppc_get_fscr_hv(vcpu)); + break; default: r = -EINVAL; break; @@ -2453,29 +2469,29 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP; break; case KVM_REG_PPC_DSCR: - vcpu->arch.dscr = set_reg_val(id, *val); + kvmppc_set_dscr_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_PURR: - vcpu->arch.purr = set_reg_val(id, *val); + kvmppc_set_purr_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_SPURR: - vcpu->arch.spurr = set_reg_val(id, *val); + kvmppc_set_spurr_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_AMR: - vcpu->arch.amr = set_reg_val(id, *val); + kvmppc_set_amr_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_UAMOR: - vcpu->arch.uamor = set_reg_val(id, *val); + kvmppc_set_uamor_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; - vcpu->arch.mmcr[i] = set_reg_val(id, *val); + kvmppc_set_mmcr_hv(vcpu, i, set_reg_val(id, *val)); break; case KVM_REG_PPC_MMCR2: - vcpu->arch.mmcr[2] = set_reg_val(id, *val); + kvmppc_set_mmcr_hv(vcpu, 2, set_reg_val(id, *val)); break; case KVM_REG_PPC_MMCRA: - vcpu->arch.mmcra = set_reg_val(id, *val); + kvmppc_set_mmcra_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_MMCRS: vcpu->arch.mmcrs = set_reg_val(id, *val); @@ -2485,32 +2501,32 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; - vcpu->arch.pmc[i] = set_reg_val(id, *val); + kvmppc_set_pmc_hv(vcpu, i, set_reg_val(id, *val)); break; case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: i = id - KVM_REG_PPC_SPMC1; vcpu->arch.spmc[i] = set_reg_val(id, *val); break; case KVM_REG_PPC_SIAR: - vcpu->arch.siar = set_reg_val(id, *val); + kvmppc_set_siar_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_SDAR: - vcpu->arch.sdar = set_reg_val(id, *val); + kvmppc_set_sdar_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_SIER: - vcpu->arch.sier[0] = set_reg_val(id, *val); + kvmppc_set_sier_hv(vcpu, 0, set_reg_val(id, *val)); break; case KVM_REG_PPC_SIER2: - vcpu->arch.sier[1] = set_reg_val(id, *val); + kvmppc_set_sier_hv(vcpu, 1, set_reg_val(id, *val)); break; case KVM_REG_PPC_SIER3: - vcpu->arch.sier[2] = set_reg_val(id, *val); + kvmppc_set_sier_hv(vcpu, 2, set_reg_val(id, *val)); break; case KVM_REG_PPC_IAMR: - vcpu->arch.iamr = set_reg_val(id, *val); + kvmppc_set_iamr_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_PSPB: - vcpu->arch.pspb = set_reg_val(id, *val); + kvmppc_set_pspb_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DPDES: if (cpu_has_feature(CPU_FTR_ARCH_300)) @@ -2519,25 +2535,25 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; case KVM_REG_PPC_VTB: - vcpu->arch.vcore->vtb = set_reg_val(id, *val); + kvmppc_set_vtb(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DAWR: - vcpu->arch.dawr0 = set_reg_val(id, *val); + kvmppc_set_dawr0_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DAWRX: - vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP; + kvmppc_set_dawrx0_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP); break; case KVM_REG_PPC_DAWR1: - vcpu->arch.dawr1 = set_reg_val(id, *val); + kvmppc_set_dawr1_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DAWRX1: - vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP; + kvmppc_set_dawrx1_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP); break; case KVM_REG_PPC_CIABR: - vcpu->arch.ciabr = set_reg_val(id, *val); + kvmppc_set_ciabr_hv(vcpu, set_reg_val(id, *val)); /* Don't allow setting breakpoints in hypervisor code */ - if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) - vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ + if ((kvmppc_get_ciabr_hv(vcpu) & CIABR_PRIV) == CIABR_PRIV_HYPER) + kvmppc_set_ciabr_hv(vcpu, kvmppc_get_ciabr_hv(vcpu) & ~CIABR_PRIV); break; case KVM_REG_PPC_CSIGR: vcpu->arch.csigr = set_reg_val(id, *val); @@ -2549,13 +2565,13 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.tcscr = set_reg_val(id, *val); break; case KVM_REG_PPC_PID: - vcpu->arch.pid = set_reg_val(id, *val); + kvmppc_set_pid(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_ACOP: vcpu->arch.acop = set_reg_val(id, *val); break; case KVM_REG_PPC_WORT: - vcpu->arch.wort = set_reg_val(id, *val); + kvmppc_set_wort_hv(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_TIDR: vcpu->arch.tid = set_reg_val(id, *val); @@ -2602,10 +2618,11 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, * decrementer, which is better than a large one that * causes a hang. */ - if (!vcpu->arch.dec_expires && tb_offset) - vcpu->arch.dec_expires = get_tb() + tb_offset; + kvmppc_set_tb_offset(vcpu, tb_offset); + if (!kvmppc_get_dec_expires(vcpu) && tb_offset) + kvmppc_set_dec_expires(vcpu, get_tb() + tb_offset); - vcpu->arch.vcore->tb_offset = tb_offset; + kvmppc_set_tb_offset(vcpu, tb_offset); break; } case KVM_REG_PPC_LPCR: @@ -2615,7 +2632,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false); break; case KVM_REG_PPC_PPR: - vcpu->arch.ppr = set_reg_val(id, *val); + kvmppc_set_ppr_hv(vcpu, set_reg_val(id, *val)); break; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_REG_PPC_TFHAR: @@ -2686,7 +2703,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DEC_EXPIRY: - vcpu->arch.dec_expires = set_reg_val(id, *val); + kvmppc_set_dec_expires(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_ONLINE: i = set_reg_val(id, *val); @@ -2699,6 +2716,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_PTCR: vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val); break; + case KVM_REG_PPC_FSCR: + kvmppc_set_fscr_hv(vcpu, set_reg_val(id, *val)); + break; default: r = -EINVAL; break; @@ -2916,19 +2936,26 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.shared_big_endian = false; #endif #endif - vcpu->arch.mmcr[0] = MMCR0_FC; + + if (kvmhv_is_nestedv2()) { + err = kvmhv_nestedv2_vcpu_create(vcpu, &vcpu->arch.nestedv2_io); + if (err < 0) + return err; + } + + kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC); if (cpu_has_feature(CPU_FTR_ARCH_31)) { - vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT; - vcpu->arch.mmcra = MMCRA_BHRB_DISABLE; + kvmppc_set_mmcr_hv(vcpu, 0, kvmppc_get_mmcr_hv(vcpu, 0) | MMCR0_PMCCEXT); + kvmppc_set_mmcra_hv(vcpu, MMCRA_BHRB_DISABLE); } - vcpu->arch.ctrl = CTRL_RUNLATCH; + kvmppc_set_ctrl_hv(vcpu, CTRL_RUNLATCH); /* default to host PVR, since we can't spoof it */ kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); spin_lock_init(&vcpu->arch.vpa_update_lock); spin_lock_init(&vcpu->arch.tbacct_lock); vcpu->arch.busy_preempt = TB_NIL; - vcpu->arch.shregs.msr = MSR_ME; + __kvmppc_set_msr_hv(vcpu, MSR_ME); vcpu->arch.intr_msr = MSR_SF | MSR_ME; /* @@ -2938,29 +2965,30 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) * don't set the HFSCR_MSGP bit, and that causes those instructions * to trap and then we emulate them. */ - vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | - HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; + kvmppc_set_hfscr_hv(vcpu, HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | + HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP); /* On POWER10 and later, allow prefixed instructions */ if (cpu_has_feature(CPU_FTR_ARCH_31)) - vcpu->arch.hfscr |= HFSCR_PREFIX; + kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PREFIX); if (cpu_has_feature(CPU_FTR_HVMODE)) { - vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); + kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & mfspr(SPRN_HFSCR)); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) - vcpu->arch.hfscr |= HFSCR_TM; + kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM); #endif } if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; - vcpu->arch.hfscr_permitted = vcpu->arch.hfscr; + vcpu->arch.hfscr_permitted = kvmppc_get_hfscr_hv(vcpu); /* * PM, EBB, TM are demand-faulted so start with it clear. */ - vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM); + kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM)); kvmppc_mmu_book3s_hv_init(vcpu); @@ -3071,6 +3099,8 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu) unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); spin_unlock(&vcpu->arch.vpa_update_lock); + if (kvmhv_is_nestedv2()) + kvmhv_nestedv2_vcpu_free(vcpu, &vcpu->arch.nestedv2_io); } static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) @@ -4035,10 +4065,58 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu) } } +static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit, + unsigned long lpcr, u64 *tb) +{ + struct kvmhv_nestedv2_io *io; + unsigned long msr, i; + int trap; + long rc; + + io = &vcpu->arch.nestedv2_io; + + msr = mfmsr(); + kvmppc_msr_hard_disable_set_facilities(vcpu, msr); + if (lazy_irq_pending()) + return 0; + + rc = kvmhv_nestedv2_flush_vcpu(vcpu, time_limit); + if (rc < 0) + return -EINVAL; + + accumulate_time(vcpu, &vcpu->arch.in_guest); + rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id, + &trap, &i); + + if (rc != H_SUCCESS) { + pr_err("KVM Guest Run VCPU hcall failed\n"); + if (rc == H_INVALID_ELEMENT_ID) + pr_err("KVM: Guest Run VCPU invalid element id at %ld\n", i); + else if (rc == H_INVALID_ELEMENT_SIZE) + pr_err("KVM: Guest Run VCPU invalid element size at %ld\n", i); + else if (rc == H_INVALID_ELEMENT_VALUE) + pr_err("KVM: Guest Run VCPU invalid element value at %ld\n", i); + return -EINVAL; + } + accumulate_time(vcpu, &vcpu->arch.guest_exit); + + *tb = mftb(); + kvmppc_gsm_reset(io->vcpu_message); + kvmppc_gsm_reset(io->vcore_message); + kvmppc_gsbm_zero(&io->valids); + + rc = kvmhv_nestedv2_parse_output(vcpu); + if (rc < 0) + return -EINVAL; + + timer_rearm_host_dec(*tb); + + return trap; +} + /* call our hypervisor to load up HV regs and go */ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) { - struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long host_psscr; unsigned long msr; struct hv_guest_state hvregs; @@ -4118,7 +4196,7 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ dec = (s32) dec; *tb = mftb(); - vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset); + vcpu->arch.dec_expires = dec + (*tb + kvmppc_get_tb_offset(vcpu)); timer_rearm_host_dec(*tb); @@ -4153,7 +4231,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu_vpa_increment_dispatch(vcpu); if (kvmhv_on_pseries()) { - trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); + if (kvmhv_is_nestedv1()) + trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); + else + trap = kvmhv_vcpu_entry_nestedv2(vcpu, time_limit, lpcr, tb); /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested && @@ -4176,7 +4257,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, __this_cpu_write(cpu_in_guest, NULL); if (trap == BOOK3S_INTERRUPT_SYSCALL && - !(vcpu->arch.shregs.msr & MSR_PR)) { + !(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { unsigned long req = kvmppc_get_gpr(vcpu, 3); /* @@ -4655,7 +4736,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (!nested) { kvmppc_core_prepare_to_enter(vcpu); - if (vcpu->arch.shregs.msr & MSR_EE) { + if (__kvmppc_get_msr_hv(vcpu) & MSR_EE) { if (xive_interrupt_pending(vcpu)) kvmppc_inject_interrupt_hv(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0); @@ -4677,7 +4758,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, tb = mftb(); - kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset); + kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + kvmppc_get_tb_offset(vcpu)); trace_kvm_guest_enter(vcpu); @@ -4844,7 +4925,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) msr |= MSR_VSX; if ((cpu_has_feature(CPU_FTR_TM) || cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && - (vcpu->arch.hfscr & HFSCR_TM)) + (kvmppc_get_hfscr_hv(vcpu) & HFSCR_TM)) msr |= MSR_TM; msr = msr_check_and_set(msr); @@ -4868,7 +4949,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { accumulate_time(vcpu, &vcpu->arch.hcall); - if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) { + if (WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { /* * These should have been caught reflected * into the guest by now. Final sanity check: @@ -5133,6 +5214,14 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) if (++cores_done >= kvm->arch.online_vcores) break; } + + if (kvmhv_is_nestedv2()) { + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR); + } + } } void kvmppc_setup_partition_table(struct kvm *kvm) @@ -5399,15 +5488,43 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* Allocate the guest's logical partition ID */ - lpid = kvmppc_alloc_lpid(); - if ((long)lpid < 0) - return -ENOMEM; - kvm->arch.lpid = lpid; + if (!kvmhv_is_nestedv2()) { + lpid = kvmppc_alloc_lpid(); + if ((long)lpid < 0) + return -ENOMEM; + kvm->arch.lpid = lpid; + } kvmppc_alloc_host_rm_ops(); kvmhv_vm_nested_init(kvm); + if (kvmhv_is_nestedv2()) { + long rc; + unsigned long guest_id; + + rc = plpar_guest_create(0, &guest_id); + + if (rc != H_SUCCESS) + pr_err("KVM: Create Guest hcall failed, rc=%ld\n", rc); + + switch (rc) { + case H_PARAMETER: + case H_FUNCTION: + case H_STATE: + return -EINVAL; + case H_NOT_ENOUGH_RESOURCES: + case H_ABORTED: + return -ENOMEM; + case H_AUTHORITY: + return -EPERM; + case H_NOT_AVAILABLE: + return -EBUSY; + } + kvm->arch.lpid = guest_id; + } + + /* * Since we don't flush the TLB when tearing down a VM, * and this lpid might have previously been used, @@ -5477,7 +5594,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) lpcr |= LPCR_HAIL; ret = kvmppc_init_vm_radix(kvm); if (ret) { - kvmppc_free_lpid(kvm->arch.lpid); + if (kvmhv_is_nestedv2()) + plpar_guest_delete(0, kvm->arch.lpid); + else + kvmppc_free_lpid(kvm->arch.lpid); return ret; } kvmppc_setup_partition_table(kvm); @@ -5567,10 +5687,14 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvm->arch.process_table = 0; if (kvm->arch.secure_guest) uv_svm_terminate(kvm->arch.lpid); - kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); + if (!kvmhv_is_nestedv2()) + kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); } - kvmppc_free_lpid(kvm->arch.lpid); + if (kvmhv_is_nestedv2()) + plpar_guest_delete(0, kvm->arch.lpid); + else + kvmppc_free_lpid(kvm->arch.lpid); kvmppc_free_pimap(kvm); } @@ -5982,6 +6106,8 @@ static int kvmhv_enable_nested(struct kvm *kvm) return -ENODEV; if (!radix_enabled()) return -ENODEV; + if (kvmhv_is_nestedv2()) + return -ENODEV; /* kvm == NULL means the caller is testing if the capability exists */ if (kvm) diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h index 2f2e59d7d433..47b2c815641e 100644 --- a/arch/powerpc/kvm/book3s_hv.h +++ b/arch/powerpc/kvm/book3s_hv.h @@ -3,6 +3,8 @@ /* * Privileged (non-hypervisor) host registers to save. */ +#include "asm/guest-state-buffer.h" + struct p9_host_os_sprs { unsigned long iamr; unsigned long amr; @@ -50,3 +52,77 @@ void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next); #define start_timing(vcpu, next) do {} while (0) #define end_timing(vcpu) do {} while (0) #endif + +static inline void __kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 val) +{ + vcpu->arch.shregs.msr = val; + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_MSR); +} + +static inline u64 __kvmppc_get_msr_hv(struct kvm_vcpu *vcpu) +{ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_MSR) < 0); + return vcpu->arch.shregs.msr; +} + +#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_SET(reg, size, iden) \ +static inline void kvmppc_set_##reg ##_hv(struct kvm_vcpu *vcpu, u##size val) \ +{ \ + vcpu->arch.reg = val; \ + kvmhv_nestedv2_mark_dirty(vcpu, iden); \ +} + +#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_GET(reg, size, iden) \ +static inline u##size kvmppc_get_##reg ##_hv(struct kvm_vcpu *vcpu) \ +{ \ + kvmhv_nestedv2_cached_reload(vcpu, iden); \ + return vcpu->arch.reg; \ +} + +#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(reg, size, iden) \ + KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_SET(reg, size, iden) \ + KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_GET(reg, size, iden) \ + +#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_SET(reg, size, iden) \ +static inline void kvmppc_set_##reg ##_hv(struct kvm_vcpu *vcpu, int i, u##size val) \ +{ \ + vcpu->arch.reg[i] = val; \ + kvmhv_nestedv2_mark_dirty(vcpu, iden(i)); \ +} + +#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_GET(reg, size, iden) \ +static inline u##size kvmppc_get_##reg ##_hv(struct kvm_vcpu *vcpu, int i) \ +{ \ + WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden(i)) < 0); \ + return vcpu->arch.reg[i]; \ +} + +#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(reg, size, iden) \ + KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_SET(reg, size, iden) \ + KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_GET(reg, size, iden) \ + +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(mmcra, 64, KVMPPC_GSID_MMCRA) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(hfscr, 64, KVMPPC_GSID_HFSCR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(fscr, 64, KVMPPC_GSID_FSCR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dscr, 64, KVMPPC_GSID_DSCR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(purr, 64, KVMPPC_GSID_PURR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(spurr, 64, KVMPPC_GSID_SPURR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(amr, 64, KVMPPC_GSID_AMR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(uamor, 64, KVMPPC_GSID_UAMOR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(siar, 64, KVMPPC_GSID_SIAR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(sdar, 64, KVMPPC_GSID_SDAR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(iamr, 64, KVMPPC_GSID_IAMR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawr0, 64, KVMPPC_GSID_DAWR0) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawr1, 64, KVMPPC_GSID_DAWR1) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawrx0, 64, KVMPPC_GSID_DAWRX0) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawrx1, 64, KVMPPC_GSID_DAWRX1) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ciabr, 64, KVMPPC_GSID_CIABR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(wort, 64, KVMPPC_GSID_WORT) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ppr, 64, KVMPPC_GSID_PPR) +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ctrl, 64, KVMPPC_GSID_CTRL); + +KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(mmcr, 64, KVMPPC_GSID_MMCR) +KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(sier, 64, KVMPPC_GSID_SIER) +KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(pmc, 32, KVMPPC_GSID_PMC) + +KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(pspb, 32, KVMPPC_GSID_PSPB) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 0f5b021fa559..fa0e3a22cac0 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -32,6 +32,7 @@ #include "book3s_xics.h" #include "book3s_xive.h" +#include "book3s_hv.h" /* * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) @@ -182,9 +183,13 @@ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); long kvmppc_rm_h_random(struct kvm_vcpu *vcpu) { + unsigned long rand; + if (ppc_md.get_random_seed && - ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4])) + ppc_md.get_random_seed(&rand)) { + kvmppc_set_gpr(vcpu, 4, rand); return H_SUCCESS; + } return H_HARDWARE; } @@ -510,7 +515,7 @@ void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) */ if ((msr & MSR_TS_MASK) == MSR_TS_MASK) msr &= ~MSR_TS_MASK; - vcpu->arch.shregs.msr = msr; + __kvmppc_set_msr_hv(vcpu, msr); kvmppc_end_cede(vcpu); } EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv); @@ -548,7 +553,7 @@ static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) kvmppc_set_srr0(vcpu, pc); kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); kvmppc_set_pc(vcpu, new_pc); - vcpu->arch.shregs.msr = new_msr; + __kvmppc_set_msr_hv(vcpu, new_msr); } void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 377d0b4a05ee..3b658b8696bc 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -428,10 +428,12 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) return vcpu->arch.trap; } +unsigned long nested_capabilities; + long kvmhv_nested_init(void) { long int ptb_order; - unsigned long ptcr; + unsigned long ptcr, host_capabilities; long rc; if (!kvmhv_on_pseries()) @@ -439,6 +441,29 @@ long kvmhv_nested_init(void) if (!radix_enabled()) return -ENODEV; + rc = plpar_guest_get_capabilities(0, &host_capabilities); + if (rc == H_SUCCESS) { + unsigned long capabilities = 0; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + capabilities |= H_GUEST_CAP_POWER10; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + capabilities |= H_GUEST_CAP_POWER9; + + nested_capabilities = capabilities & host_capabilities; + rc = plpar_guest_set_capabilities(0, nested_capabilities); + if (rc != H_SUCCESS) { + pr_err("kvm-hv: Could not configure parent hypervisor capabilities (rc=%ld)", + rc); + return -ENODEV; + } + + static_branch_enable(&__kvmhv_is_nestedv2); + return 0; + } + + pr_info("kvm-hv: nestedv2 get capabilities hcall failed, falling back to nestedv1 (rc=%ld)\n", + rc); /* Partition table entry is 1<<4 bytes in size, hence the 4. */ ptb_order = KVM_MAX_NESTED_GUESTS_SHIFT + 4; /* Minimum partition table size is 1<<12 bytes */ @@ -478,7 +503,7 @@ void kvmhv_nested_exit(void) } } -static void kvmhv_flush_lpid(unsigned int lpid) +static void kvmhv_flush_lpid(u64 lpid) { long rc; @@ -500,17 +525,22 @@ static void kvmhv_flush_lpid(unsigned int lpid) pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc); } -void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) +void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1) { if (!kvmhv_on_pseries()) { mmu_partition_table_set_entry(lpid, dw0, dw1, true); return; } - pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0); - pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1); - /* L0 will do the necessary barriers */ - kvmhv_flush_lpid(lpid); + if (kvmhv_is_nestedv1()) { + pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0); + pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1); + /* L0 will do the necessary barriers */ + kvmhv_flush_lpid(lpid); + } + + if (kvmhv_is_nestedv2()) + kvmhv_nestedv2_set_ptbl_entry(lpid, dw0, dw1); } static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp) diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c new file mode 100644 index 000000000000..fd3c4f2d9480 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -0,0 +1,994 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2023 Jordan Niethe, IBM Corp. <jniethe5@gmail.com> + * + * Authors: + * Jordan Niethe <jniethe5@gmail.com> + * + * Description: KVM functions specific to running on Book 3S + * processors as a NESTEDv2 guest. + * + */ + +#include "linux/blk-mq.h" +#include "linux/console.h" +#include "linux/gfp_types.h" +#include "linux/signal.h" +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/pgtable.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/hvcall.h> +#include <asm/pgalloc.h> +#include <asm/reg.h> +#include <asm/plpar_wrappers.h> +#include <asm/guest-state-buffer.h> +#include "trace_hv.h" + +struct static_key_false __kvmhv_is_nestedv2 __read_mostly; +EXPORT_SYMBOL_GPL(__kvmhv_is_nestedv2); + + +static size_t +gs_msg_ops_kvmhv_nestedv2_config_get_size(struct kvmppc_gs_msg *gsm) +{ + u16 ids[] = { + KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE, + KVMPPC_GSID_RUN_INPUT, + KVMPPC_GSID_RUN_OUTPUT, + + }; + size_t size = 0; + + for (int i = 0; i < ARRAY_SIZE(ids); i++) + size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i])); + return size; +} + +static int +gs_msg_ops_kvmhv_nestedv2_config_fill_info(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + struct kvmhv_nestedv2_config *cfg; + int rc; + + cfg = gsm->data; + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE)) { + rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE, + cfg->vcpu_run_output_size); + if (rc < 0) + return rc; + } + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_INPUT)) { + rc = kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_INPUT, + cfg->vcpu_run_input_cfg); + if (rc < 0) + return rc; + } + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_OUTPUT)) { + kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_OUTPUT, + cfg->vcpu_run_output_cfg); + if (rc < 0) + return rc; + } + + return 0; +} + +static int +gs_msg_ops_kvmhv_nestedv2_config_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb) +{ + struct kvmhv_nestedv2_config *cfg; + struct kvmppc_gs_parser gsp = { 0 }; + struct kvmppc_gs_elem *gse; + int rc; + + cfg = gsm->data; + + rc = kvmppc_gse_parse(&gsp, gsb); + if (rc < 0) + return rc; + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE); + if (gse) + cfg->vcpu_run_output_size = kvmppc_gse_get_u64(gse); + return 0; +} + +static struct kvmppc_gs_msg_ops config_msg_ops = { + .get_size = gs_msg_ops_kvmhv_nestedv2_config_get_size, + .fill_info = gs_msg_ops_kvmhv_nestedv2_config_fill_info, + .refresh_info = gs_msg_ops_kvmhv_nestedv2_config_refresh_info, +}; + +static size_t gs_msg_ops_vcpu_get_size(struct kvmppc_gs_msg *gsm) +{ + struct kvmppc_gs_bitmap gsbm = { 0 }; + size_t size = 0; + u16 iden; + + kvmppc_gsbm_fill(&gsbm); + kvmppc_gsbm_for_each(&gsbm, iden) + { + switch (iden) { + case KVMPPC_GSID_HOST_STATE_SIZE: + case KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE: + case KVMPPC_GSID_PARTITION_TABLE: + case KVMPPC_GSID_PROCESS_TABLE: + case KVMPPC_GSID_RUN_INPUT: + case KVMPPC_GSID_RUN_OUTPUT: + break; + default: + size += kvmppc_gse_total_size(kvmppc_gsid_size(iden)); + } + } + return size; +} + +static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + struct kvm_vcpu *vcpu; + vector128 v; + int rc, i; + u16 iden; + + vcpu = gsm->data; + + kvmppc_gsm_for_each(gsm, iden) + { + rc = 0; + + if ((gsm->flags & KVMPPC_GS_FLAGS_WIDE) != + (kvmppc_gsid_flags(iden) & KVMPPC_GS_FLAGS_WIDE)) + continue; + + switch (iden) { + case KVMPPC_GSID_DSCR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.dscr); + break; + case KVMPPC_GSID_MMCRA: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.mmcra); + break; + case KVMPPC_GSID_HFSCR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.hfscr); + break; + case KVMPPC_GSID_PURR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.purr); + break; + case KVMPPC_GSID_SPURR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.spurr); + break; + case KVMPPC_GSID_AMR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.amr); + break; + case KVMPPC_GSID_UAMOR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.uamor); + break; + case KVMPPC_GSID_SIAR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.siar); + break; + case KVMPPC_GSID_SDAR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.sdar); + break; + case KVMPPC_GSID_IAMR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.iamr); + break; + case KVMPPC_GSID_DAWR0: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.dawr0); + break; + case KVMPPC_GSID_DAWR1: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.dawr1); + break; + case KVMPPC_GSID_DAWRX0: + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.dawrx0); + break; + case KVMPPC_GSID_DAWRX1: + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.dawrx1); + break; + case KVMPPC_GSID_CIABR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ciabr); + break; + case KVMPPC_GSID_WORT: + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.wort); + break; + case KVMPPC_GSID_PPR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ppr); + break; + case KVMPPC_GSID_PSPB: + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.pspb); + break; + case KVMPPC_GSID_TAR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.tar); + break; + case KVMPPC_GSID_FSCR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.fscr); + break; + case KVMPPC_GSID_EBBHR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ebbhr); + break; + case KVMPPC_GSID_EBBRR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ebbrr); + break; + case KVMPPC_GSID_BESCR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.bescr); + break; + case KVMPPC_GSID_IC: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ic); + break; + case KVMPPC_GSID_CTRL: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ctrl); + break; + case KVMPPC_GSID_PIDR: + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.pid); + break; + case KVMPPC_GSID_AMOR: { + u64 amor = ~0; + + rc = kvmppc_gse_put_u64(gsb, iden, amor); + break; + } + case KVMPPC_GSID_VRSAVE: + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.vrsave); + break; + case KVMPPC_GSID_MMCR(0)... KVMPPC_GSID_MMCR(3): + i = iden - KVMPPC_GSID_MMCR(0); + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.mmcr[i]); + break; + case KVMPPC_GSID_SIER(0)... KVMPPC_GSID_SIER(2): + i = iden - KVMPPC_GSID_SIER(0); + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.sier[i]); + break; + case KVMPPC_GSID_PMC(0)... KVMPPC_GSID_PMC(5): + i = iden - KVMPPC_GSID_PMC(0); + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.pmc[i]); + break; + case KVMPPC_GSID_GPR(0)... KVMPPC_GSID_GPR(31): + i = iden - KVMPPC_GSID_GPR(0); + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.regs.gpr[i]); + break; + case KVMPPC_GSID_CR: + rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.regs.ccr); + break; + case KVMPPC_GSID_XER: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.regs.xer); + break; + case KVMPPC_GSID_CTR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.regs.ctr); + break; + case KVMPPC_GSID_LR: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.regs.link); + break; + case KVMPPC_GSID_NIA: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.regs.nip); + break; + case KVMPPC_GSID_SRR0: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.srr0); + break; + case KVMPPC_GSID_SRR1: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.srr1); + break; + case KVMPPC_GSID_SPRG0: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.sprg0); + break; + case KVMPPC_GSID_SPRG1: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.sprg1); + break; + case KVMPPC_GSID_SPRG2: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.sprg2); + break; + case KVMPPC_GSID_SPRG3: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.sprg3); + break; + case KVMPPC_GSID_DAR: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.dar); + break; + case KVMPPC_GSID_DSISR: + rc = kvmppc_gse_put_u32(gsb, iden, + vcpu->arch.shregs.dsisr); + break; + case KVMPPC_GSID_MSR: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.shregs.msr); + break; + case KVMPPC_GSID_VTB: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.vcore->vtb); + break; + case KVMPPC_GSID_LPCR: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.vcore->lpcr); + break; + case KVMPPC_GSID_TB_OFFSET: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.vcore->tb_offset); + break; + case KVMPPC_GSID_FPSCR: + rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.fp.fpscr); + break; + case KVMPPC_GSID_VSRS(0)... KVMPPC_GSID_VSRS(31): + i = iden - KVMPPC_GSID_VSRS(0); + memcpy(&v, &vcpu->arch.fp.fpr[i], + sizeof(vcpu->arch.fp.fpr[i])); + rc = kvmppc_gse_put_vector128(gsb, iden, &v); + break; +#ifdef CONFIG_VSX + case KVMPPC_GSID_VSCR: + rc = kvmppc_gse_put_u32(gsb, iden, + vcpu->arch.vr.vscr.u[3]); + break; + case KVMPPC_GSID_VSRS(32)... KVMPPC_GSID_VSRS(63): + i = iden - KVMPPC_GSID_VSRS(32); + rc = kvmppc_gse_put_vector128(gsb, iden, + &vcpu->arch.vr.vr[i]); + break; +#endif + case KVMPPC_GSID_DEC_EXPIRY_TB: { + u64 dw; + + dw = vcpu->arch.dec_expires - + vcpu->arch.vcore->tb_offset; + rc = kvmppc_gse_put_u64(gsb, iden, dw); + break; + } + case KVMPPC_GSID_LOGICAL_PVR: + rc = kvmppc_gse_put_u32(gsb, iden, + vcpu->arch.vcore->arch_compat); + break; + } + + if (rc < 0) + return rc; + } + + return 0; +} + +static int gs_msg_ops_vcpu_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb) +{ + struct kvmppc_gs_parser gsp = { 0 }; + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_bitmap *valids; + struct kvm_vcpu *vcpu; + struct kvmppc_gs_elem *gse; + vector128 v; + int rc, i; + u16 iden; + + vcpu = gsm->data; + + rc = kvmppc_gse_parse(&gsp, gsb); + if (rc < 0) + return rc; + + io = &vcpu->arch.nestedv2_io; + valids = &io->valids; + + kvmppc_gsp_for_each(&gsp, iden, gse) + { + switch (iden) { + case KVMPPC_GSID_DSCR: + vcpu->arch.dscr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_MMCRA: + vcpu->arch.mmcra = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_HFSCR: + vcpu->arch.hfscr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_PURR: + vcpu->arch.purr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SPURR: + vcpu->arch.spurr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_AMR: + vcpu->arch.amr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_UAMOR: + vcpu->arch.uamor = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SIAR: + vcpu->arch.siar = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SDAR: + vcpu->arch.sdar = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_IAMR: + vcpu->arch.iamr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_DAWR0: + vcpu->arch.dawr0 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_DAWR1: + vcpu->arch.dawr1 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_DAWRX0: + vcpu->arch.dawrx0 = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_DAWRX1: + vcpu->arch.dawrx1 = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_CIABR: + vcpu->arch.ciabr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_WORT: + vcpu->arch.wort = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_PPR: + vcpu->arch.ppr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_PSPB: + vcpu->arch.pspb = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_TAR: + vcpu->arch.tar = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_FSCR: + vcpu->arch.fscr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_EBBHR: + vcpu->arch.ebbhr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_EBBRR: + vcpu->arch.ebbrr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_BESCR: + vcpu->arch.bescr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_IC: + vcpu->arch.ic = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_CTRL: + vcpu->arch.ctrl = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_PIDR: + vcpu->arch.pid = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_AMOR: + break; + case KVMPPC_GSID_VRSAVE: + vcpu->arch.vrsave = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_MMCR(0)... KVMPPC_GSID_MMCR(3): + i = iden - KVMPPC_GSID_MMCR(0); + vcpu->arch.mmcr[i] = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SIER(0)... KVMPPC_GSID_SIER(2): + i = iden - KVMPPC_GSID_SIER(0); + vcpu->arch.sier[i] = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_PMC(0)... KVMPPC_GSID_PMC(5): + i = iden - KVMPPC_GSID_PMC(0); + vcpu->arch.pmc[i] = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_GPR(0)... KVMPPC_GSID_GPR(31): + i = iden - KVMPPC_GSID_GPR(0); + vcpu->arch.regs.gpr[i] = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_CR: + vcpu->arch.regs.ccr = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_XER: + vcpu->arch.regs.xer = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_CTR: + vcpu->arch.regs.ctr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_LR: + vcpu->arch.regs.link = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_NIA: + vcpu->arch.regs.nip = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SRR0: + vcpu->arch.shregs.srr0 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SRR1: + vcpu->arch.shregs.srr1 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SPRG0: + vcpu->arch.shregs.sprg0 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SPRG1: + vcpu->arch.shregs.sprg1 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SPRG2: + vcpu->arch.shregs.sprg2 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_SPRG3: + vcpu->arch.shregs.sprg3 = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_DAR: + vcpu->arch.shregs.dar = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_DSISR: + vcpu->arch.shregs.dsisr = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_MSR: + vcpu->arch.shregs.msr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_VTB: + vcpu->arch.vcore->vtb = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_LPCR: + vcpu->arch.vcore->lpcr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_TB_OFFSET: + vcpu->arch.vcore->tb_offset = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_FPSCR: + vcpu->arch.fp.fpscr = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_VSRS(0)... KVMPPC_GSID_VSRS(31): + kvmppc_gse_get_vector128(gse, &v); + i = iden - KVMPPC_GSID_VSRS(0); + memcpy(&vcpu->arch.fp.fpr[i], &v, + sizeof(vcpu->arch.fp.fpr[i])); + break; +#ifdef CONFIG_VSX + case KVMPPC_GSID_VSCR: + vcpu->arch.vr.vscr.u[3] = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_VSRS(32)... KVMPPC_GSID_VSRS(63): + i = iden - KVMPPC_GSID_VSRS(32); + kvmppc_gse_get_vector128(gse, &vcpu->arch.vr.vr[i]); + break; +#endif + case KVMPPC_GSID_HDAR: + vcpu->arch.fault_dar = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_HDSISR: + vcpu->arch.fault_dsisr = kvmppc_gse_get_u32(gse); + break; + case KVMPPC_GSID_ASDR: + vcpu->arch.fault_gpa = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_HEIR: + vcpu->arch.emul_inst = kvmppc_gse_get_u64(gse); + break; + case KVMPPC_GSID_DEC_EXPIRY_TB: { + u64 dw; + + dw = kvmppc_gse_get_u64(gse); + vcpu->arch.dec_expires = + dw + vcpu->arch.vcore->tb_offset; + break; + } + case KVMPPC_GSID_LOGICAL_PVR: + vcpu->arch.vcore->arch_compat = kvmppc_gse_get_u32(gse); + break; + default: + continue; + } + kvmppc_gsbm_set(valids, iden); + } + + return 0; +} + +static struct kvmppc_gs_msg_ops vcpu_message_ops = { + .get_size = gs_msg_ops_vcpu_get_size, + .fill_info = gs_msg_ops_vcpu_fill_info, + .refresh_info = gs_msg_ops_vcpu_refresh_info, +}; + +static int kvmhv_nestedv2_host_create(struct kvm_vcpu *vcpu, + struct kvmhv_nestedv2_io *io) +{ + struct kvmhv_nestedv2_config *cfg; + struct kvmppc_gs_buff *gsb, *vcpu_run_output, *vcpu_run_input; + unsigned long guest_id, vcpu_id; + struct kvmppc_gs_msg *gsm, *vcpu_message, *vcore_message; + int rc; + + cfg = &io->cfg; + guest_id = vcpu->kvm->arch.lpid; + vcpu_id = vcpu->vcpu_id; + + gsm = kvmppc_gsm_new(&config_msg_ops, cfg, KVMPPC_GS_FLAGS_WIDE, + GFP_KERNEL); + if (!gsm) { + rc = -ENOMEM; + goto err; + } + + gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), guest_id, vcpu_id, + GFP_KERNEL); + if (!gsb) { + rc = -ENOMEM; + goto free_gsm; + } + + rc = kvmppc_gsb_receive_datum(gsb, gsm, + KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE); + if (rc < 0) { + pr_err("KVM-NESTEDv2: couldn't get vcpu run output buffer minimum size\n"); + goto free_gsb; + } + + vcpu_run_output = kvmppc_gsb_new(cfg->vcpu_run_output_size, guest_id, + vcpu_id, GFP_KERNEL); + if (!vcpu_run_output) { + rc = -ENOMEM; + goto free_gsb; + } + + cfg->vcpu_run_output_cfg.address = kvmppc_gsb_paddress(vcpu_run_output); + cfg->vcpu_run_output_cfg.size = kvmppc_gsb_capacity(vcpu_run_output); + io->vcpu_run_output = vcpu_run_output; + + gsm->flags = 0; + rc = kvmppc_gsb_send_datum(gsb, gsm, KVMPPC_GSID_RUN_OUTPUT); + if (rc < 0) { + pr_err("KVM-NESTEDv2: couldn't set vcpu run output buffer\n"); + goto free_gs_out; + } + + vcpu_message = kvmppc_gsm_new(&vcpu_message_ops, vcpu, 0, GFP_KERNEL); + if (!vcpu_message) { + rc = -ENOMEM; + goto free_gs_out; + } + kvmppc_gsm_include_all(vcpu_message); + + io->vcpu_message = vcpu_message; + + vcpu_run_input = kvmppc_gsb_new(kvmppc_gsm_size(vcpu_message), guest_id, + vcpu_id, GFP_KERNEL); + if (!vcpu_run_input) { + rc = -ENOMEM; + goto free_vcpu_message; + } + + io->vcpu_run_input = vcpu_run_input; + cfg->vcpu_run_input_cfg.address = kvmppc_gsb_paddress(vcpu_run_input); + cfg->vcpu_run_input_cfg.size = kvmppc_gsb_capacity(vcpu_run_input); + rc = kvmppc_gsb_send_datum(gsb, gsm, KVMPPC_GSID_RUN_INPUT); + if (rc < 0) { + pr_err("KVM-NESTEDv2: couldn't set vcpu run input buffer\n"); + goto free_vcpu_run_input; + } + + vcore_message = kvmppc_gsm_new(&vcpu_message_ops, vcpu, + KVMPPC_GS_FLAGS_WIDE, GFP_KERNEL); + if (!vcore_message) { + rc = -ENOMEM; + goto free_vcpu_run_input; + } + + kvmppc_gsm_include_all(vcore_message); + kvmppc_gsbm_clear(&vcore_message->bitmap, KVMPPC_GSID_LOGICAL_PVR); + io->vcore_message = vcore_message; + + kvmppc_gsbm_fill(&io->valids); + kvmppc_gsm_free(gsm); + kvmppc_gsb_free(gsb); + return 0; + +free_vcpu_run_input: + kvmppc_gsb_free(vcpu_run_input); +free_vcpu_message: + kvmppc_gsm_free(vcpu_message); +free_gs_out: + kvmppc_gsb_free(vcpu_run_output); +free_gsb: + kvmppc_gsb_free(gsb); +free_gsm: + kvmppc_gsm_free(gsm); +err: + return rc; +} + +/** + * __kvmhv_nestedv2_mark_dirty() - mark a Guest State ID to be sent to the host + * @vcpu: vcpu + * @iden: guest state ID + * + * Mark a guest state ID as having been changed by the L1 host and thus + * the new value must be sent to the L0 hypervisor. See kvmhv_nestedv2_flush_vcpu() + */ +int __kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden) +{ + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_bitmap *valids; + struct kvmppc_gs_msg *gsm; + + if (!iden) + return 0; + + io = &vcpu->arch.nestedv2_io; + valids = &io->valids; + gsm = io->vcpu_message; + kvmppc_gsm_include(gsm, iden); + gsm = io->vcore_message; + kvmppc_gsm_include(gsm, iden); + kvmppc_gsbm_set(valids, iden); + return 0; +} +EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_mark_dirty); + +/** + * __kvmhv_nestedv2_cached_reload() - reload a Guest State ID from the host + * @vcpu: vcpu + * @iden: guest state ID + * + * Reload the value for the guest state ID from the L0 host into the L1 host. + * This is cached so that going out to the L0 host only happens if necessary. + */ +int __kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden) +{ + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_bitmap *valids; + struct kvmppc_gs_buff *gsb; + struct kvmppc_gs_msg gsm; + int rc; + + if (!iden) + return 0; + + io = &vcpu->arch.nestedv2_io; + valids = &io->valids; + if (kvmppc_gsbm_test(valids, iden)) + return 0; + + gsb = io->vcpu_run_input; + kvmppc_gsm_init(&gsm, &vcpu_message_ops, vcpu, kvmppc_gsid_flags(iden)); + rc = kvmppc_gsb_receive_datum(gsb, &gsm, iden); + if (rc < 0) { + pr_err("KVM-NESTEDv2: couldn't get GSID: 0x%x\n", iden); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_cached_reload); + +/** + * kvmhv_nestedv2_flush_vcpu() - send modified Guest State IDs to the host + * @vcpu: vcpu + * @time_limit: hdec expiry tb + * + * Send the values marked by __kvmhv_nestedv2_mark_dirty() to the L0 host. + * Thread wide values are copied to the H_GUEST_RUN_VCPU input buffer. Guest + * wide values need to be sent with H_GUEST_SET first. + * + * The hdec tb offset is always sent to L0 host. + */ +int kvmhv_nestedv2_flush_vcpu(struct kvm_vcpu *vcpu, u64 time_limit) +{ + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_buff *gsb; + struct kvmppc_gs_msg *gsm; + int rc; + + io = &vcpu->arch.nestedv2_io; + gsb = io->vcpu_run_input; + gsm = io->vcore_message; + rc = kvmppc_gsb_send_data(gsb, gsm); + if (rc < 0) { + pr_err("KVM-NESTEDv2: couldn't set guest wide elements\n"); + return rc; + } + + gsm = io->vcpu_message; + kvmppc_gsb_reset(gsb); + rc = kvmppc_gsm_fill_info(gsm, gsb); + if (rc < 0) { + pr_err("KVM-NESTEDv2: couldn't fill vcpu run input buffer\n"); + return rc; + } + + rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_HDEC_EXPIRY_TB, time_limit); + if (rc < 0) + return rc; + return 0; +} +EXPORT_SYMBOL_GPL(kvmhv_nestedv2_flush_vcpu); + +/** + * kvmhv_nestedv2_set_ptbl_entry() - send partition and process table state to + * L0 host + * @lpid: guest id + * @dw0: partition table double word + * @dw1: process table double word + */ +int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1) +{ + struct kvmppc_gs_part_table patbl; + struct kvmppc_gs_proc_table prtbl; + struct kvmppc_gs_buff *gsb; + size_t size; + int rc; + + size = kvmppc_gse_total_size( + kvmppc_gsid_size(KVMPPC_GSID_PARTITION_TABLE)) + + kvmppc_gse_total_size( + kvmppc_gsid_size(KVMPPC_GSID_PROCESS_TABLE)) + + sizeof(struct kvmppc_gs_header); + gsb = kvmppc_gsb_new(size, lpid, 0, GFP_KERNEL); + if (!gsb) + return -ENOMEM; + + patbl.address = dw0 & RPDB_MASK; + patbl.ea_bits = ((((dw0 & RTS1_MASK) >> (RTS1_SHIFT - 3)) | + ((dw0 & RTS2_MASK) >> RTS2_SHIFT)) + + 31); + patbl.gpd_size = 1ul << ((dw0 & RPDS_MASK) + 3); + rc = kvmppc_gse_put_part_table(gsb, KVMPPC_GSID_PARTITION_TABLE, patbl); + if (rc < 0) + goto free_gsb; + + prtbl.address = dw1 & PRTB_MASK; + prtbl.gpd_size = 1ul << ((dw1 & PRTS_MASK) + 12); + rc = kvmppc_gse_put_proc_table(gsb, KVMPPC_GSID_PROCESS_TABLE, prtbl); + if (rc < 0) + goto free_gsb; + + rc = kvmppc_gsb_send(gsb, KVMPPC_GS_FLAGS_WIDE); + if (rc < 0) { + pr_err("KVM-NESTEDv2: couldn't set the PATE\n"); + goto free_gsb; + } + + kvmppc_gsb_free(gsb); + return 0; + +free_gsb: + kvmppc_gsb_free(gsb); + return rc; +} +EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_ptbl_entry); + +/** + * kvmhv_nestedv2_parse_output() - receive values from H_GUEST_RUN_VCPU output + * @vcpu: vcpu + * + * Parse the output buffer from H_GUEST_RUN_VCPU to update vcpu. + */ +int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu) +{ + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_buff *gsb; + struct kvmppc_gs_msg gsm; + + io = &vcpu->arch.nestedv2_io; + gsb = io->vcpu_run_output; + + vcpu->arch.fault_dar = 0; + vcpu->arch.fault_dsisr = 0; + vcpu->arch.fault_gpa = 0; + vcpu->arch.emul_inst = KVM_INST_FETCH_FAILED; + + kvmppc_gsm_init(&gsm, &vcpu_message_ops, vcpu, 0); + return kvmppc_gsm_refresh_info(&gsm, gsb); +} +EXPORT_SYMBOL_GPL(kvmhv_nestedv2_parse_output); + +static void kvmhv_nestedv2_host_free(struct kvm_vcpu *vcpu, + struct kvmhv_nestedv2_io *io) +{ + kvmppc_gsm_free(io->vcpu_message); + kvmppc_gsm_free(io->vcore_message); + kvmppc_gsb_free(io->vcpu_run_input); + kvmppc_gsb_free(io->vcpu_run_output); +} + +int __kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu, struct pt_regs *regs) +{ + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_bitmap *valids; + struct kvmppc_gs_buff *gsb; + struct kvmppc_gs_msg gsm; + int rc = 0; + + + io = &vcpu->arch.nestedv2_io; + valids = &io->valids; + + gsb = io->vcpu_run_input; + kvmppc_gsm_init(&gsm, &vcpu_message_ops, vcpu, 0); + + for (int i = 0; i < 32; i++) { + if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_GPR(i))) + kvmppc_gsm_include(&gsm, KVMPPC_GSID_GPR(i)); + } + + if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_CR)) + kvmppc_gsm_include(&gsm, KVMPPC_GSID_CR); + + if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_XER)) + kvmppc_gsm_include(&gsm, KVMPPC_GSID_XER); + + if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_CTR)) + kvmppc_gsm_include(&gsm, KVMPPC_GSID_CTR); + + if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_LR)) + kvmppc_gsm_include(&gsm, KVMPPC_GSID_LR); + + if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_NIA)) + kvmppc_gsm_include(&gsm, KVMPPC_GSID_NIA); + + rc = kvmppc_gsb_receive_data(gsb, &gsm); + if (rc < 0) + pr_err("KVM-NESTEDv2: couldn't reload ptregs\n"); + + return rc; +} +EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_reload_ptregs); + +int __kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu, + struct pt_regs *regs) +{ + for (int i = 0; i < 32; i++) + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_GPR(i)); + + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CR); + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_XER); + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CTR); + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LR); + kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_NIA); + + return 0; +} +EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_mark_dirty_ptregs); + +/** + * kvmhv_nestedv2_vcpu_create() - create nested vcpu for the NESTEDv2 API + * @vcpu: vcpu + * @io: NESTEDv2 nested io state + * + * Parse the output buffer from H_GUEST_RUN_VCPU to update vcpu. + */ +int kvmhv_nestedv2_vcpu_create(struct kvm_vcpu *vcpu, + struct kvmhv_nestedv2_io *io) +{ + long rc; + + rc = plpar_guest_create_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id); + + if (rc != H_SUCCESS) { + pr_err("KVM: Create Guest vcpu hcall failed, rc=%ld\n", rc); + switch (rc) { + case H_NOT_ENOUGH_RESOURCES: + case H_ABORTED: + return -ENOMEM; + case H_AUTHORITY: + return -EPERM; + default: + return -EINVAL; + } + } + + rc = kvmhv_nestedv2_host_create(vcpu, io); + + return rc; +} +EXPORT_SYMBOL_GPL(kvmhv_nestedv2_vcpu_create); + +/** + * kvmhv_nestedv2_vcpu_free() - free the NESTEDv2 state + * @vcpu: vcpu + * @io: NESTEDv2 nested io state + */ +void kvmhv_nestedv2_vcpu_free(struct kvm_vcpu *vcpu, + struct kvmhv_nestedv2_io *io) +{ + kvmhv_nestedv2_host_free(vcpu, io); +} +EXPORT_SYMBOL_GPL(kvmhv_nestedv2_vcpu_free); diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 34f1db212824..34bc0a8a1288 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -305,7 +305,7 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6 u32 pid; lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; - pid = vcpu->arch.pid; + pid = kvmppc_get_pid(vcpu); /* * Prior memory accesses to host PID Q3 must be completed before we @@ -330,7 +330,7 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 int i; lpid = kvm->arch.lpid; - pid = vcpu->arch.pid; + pid = kvmppc_get_pid(vcpu); /* * See switch_mmu_to_guest_radix. ptesync should not be required here diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 82be6d87514b..9012acadbca8 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -174,14 +174,14 @@ long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu) ppc_md.hmi_exception_early(NULL); out: - if (vc->tb_offset) { + if (kvmppc_get_tb_offset(vcpu)) { u64 new_tb = mftb() + vc->tb_offset; mtspr(SPRN_TBU40, new_tb); if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { new_tb += 0x1000000; mtspr(SPRN_TBU40, new_tb); } - vc->tb_offset_applied = vc->tb_offset; + vc->tb_offset_applied = kvmppc_get_tb_offset(vcpu); } return ret; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 9182324dbef9..17cb75a127b0 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -776,8 +776,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); r &= ~HPTE_GR_RESERVED; } - vcpu->arch.regs.gpr[4 + i * 2] = v; - vcpu->arch.regs.gpr[5 + i * 2] = r; + kvmppc_set_gpr(vcpu, 4 + i * 2, v); + kvmppc_set_gpr(vcpu, 5 + i * 2, r); } return H_SUCCESS; } @@ -824,7 +824,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, } } } - vcpu->arch.regs.gpr[4] = gr; + kvmppc_set_gpr(vcpu, 4, gr); ret = H_SUCCESS; out: unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); @@ -872,7 +872,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, kvmppc_set_dirty_from_hpte(kvm, v, gr); } } - vcpu->arch.regs.gpr[4] = gr; + kvmppc_set_gpr(vcpu, 4, gr); ret = H_SUCCESS; out: unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index e165bfa842bf..e42984878503 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -481,7 +481,7 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu) { - vcpu->arch.regs.gpr[5] = get_tb(); + kvmppc_set_gpr(vcpu, 5, get_tb()); return xics_rm_h_xirr(vcpu); } @@ -518,7 +518,7 @@ unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu) } while (!icp_rm_try_update(icp, old_state, new_state)); /* Return the result in GPR4 */ - vcpu->arch.regs.gpr[4] = xirr; + kvmppc_set_gpr(vcpu, 4, xirr); return check_too_hard(xics, icp); } diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e2d6f9327f77..92f33115144b 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -858,7 +858,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) } kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; - pr_info("LPID %d went secure\n", kvm->arch.lpid); + pr_info("LPID %lld went secure\n", kvm->arch.lpid); out: srcu_read_unlock(&kvm->srcu, srcu_idx); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f4115819e738..29a382249770 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -328,7 +328,7 @@ static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu) */ /* Return interrupt and old CPPR in GPR4 */ - vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24); + kvmppc_set_gpr(vcpu, 4, hirq | (old_cppr << 24)); return H_SUCCESS; } @@ -364,7 +364,7 @@ static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server hirq = xive_vm_scan_interrupts(xc, pending, scan_poll); /* Return interrupt and old CPPR in GPR4 */ - vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24); + kvmppc_set_gpr(vcpu, 4, hirq | (xc->cppr << 24)); return H_SUCCESS; } @@ -884,10 +884,10 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, } if (single_escalation) - name = kasprintf(GFP_KERNEL, "kvm-%d-%d", + name = kasprintf(GFP_KERNEL, "kvm-%lld-%d", vcpu->kvm->arch.lpid, xc->server_num); else - name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", + name = kasprintf(GFP_KERNEL, "kvm-%lld-%d-%d", vcpu->kvm->arch.lpid, xc->server_num, prio); if (!name) { pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", @@ -2779,8 +2779,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req) { - struct kvmppc_vcore *vc = vcpu->arch.vcore; - /* The VM should have configured XICS mode before doing XICS hcalls. */ if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; @@ -2799,7 +2797,7 @@ int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req) return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4)); case H_XIRR_X: xive_vm_h_xirr(vcpu); - kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset); + kvmppc_set_gpr(vcpu, 5, get_tb() + kvmppc_get_tb_offset(vcpu)); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 712ab91ced39..6e2ebbd8aaac 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -567,7 +567,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, u8 priority; struct kvm_ppc_xive_eq kvm_eq; int rc; - __be32 *qaddr = 0; + __be32 *qaddr = NULL; struct page *page; struct xive_q *q; gfn_t gfn; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 059c08ae0340..077fd88a0b68 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -92,7 +92,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_host_swabbed = 0; emulated = EMULATE_FAIL; - vcpu->arch.regs.msr = vcpu->arch.shared->msr; + vcpu->arch.regs.msr = kvmppc_get_msr(vcpu); + kvmhv_nestedv2_reload_ptregs(vcpu, &vcpu->arch.regs); if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); @@ -250,7 +251,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_sp64_extend = 1; emulated = kvmppc_handle_store(vcpu, - VCPU_FPR(vcpu, op.reg), size, 1); + kvmppc_get_fpr(vcpu, op.reg), size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) kvmppc_set_gpr(vcpu, op.update_reg, op.ea); @@ -357,6 +358,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } trace_kvm_ppc_instr(ppc_inst_val(inst), kvmppc_get_pc(vcpu), emulated); + kvmhv_nestedv2_mark_dirty_ptregs(vcpu, &vcpu->arch.regs); /* Advance past emulated instruction. */ if (emulated != EMULATE_FAIL) diff --git a/arch/powerpc/kvm/guest-state-buffer.c b/arch/powerpc/kvm/guest-state-buffer.c new file mode 100644 index 000000000000..b80dbc58621f --- /dev/null +++ b/arch/powerpc/kvm/guest-state-buffer.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "asm/hvcall.h" +#include <linux/log2.h> +#include <asm/pgalloc.h> +#include <asm/guest-state-buffer.h> + +static const u16 kvmppc_gse_iden_len[__KVMPPC_GSE_TYPE_MAX] = { + [KVMPPC_GSE_BE32] = sizeof(__be32), + [KVMPPC_GSE_BE64] = sizeof(__be64), + [KVMPPC_GSE_VEC128] = sizeof(vector128), + [KVMPPC_GSE_PARTITION_TABLE] = sizeof(struct kvmppc_gs_part_table), + [KVMPPC_GSE_PROCESS_TABLE] = sizeof(struct kvmppc_gs_proc_table), + [KVMPPC_GSE_BUFFER] = sizeof(struct kvmppc_gs_buff_info), +}; + +/** + * kvmppc_gsb_new() - create a new guest state buffer + * @size: total size of the guest state buffer (includes header) + * @guest_id: guest_id + * @vcpu_id: vcpu_id + * @flags: GFP flags + * + * Returns a guest state buffer. + */ +struct kvmppc_gs_buff *kvmppc_gsb_new(size_t size, unsigned long guest_id, + unsigned long vcpu_id, gfp_t flags) +{ + struct kvmppc_gs_buff *gsb; + + gsb = kzalloc(sizeof(*gsb), flags); + if (!gsb) + return NULL; + + size = roundup_pow_of_two(size); + gsb->hdr = kzalloc(size, GFP_KERNEL); + if (!gsb->hdr) + goto free; + + gsb->capacity = size; + gsb->len = sizeof(struct kvmppc_gs_header); + gsb->vcpu_id = vcpu_id; + gsb->guest_id = guest_id; + + gsb->hdr->nelems = cpu_to_be32(0); + + return gsb; + +free: + kfree(gsb); + return NULL; +} +EXPORT_SYMBOL_GPL(kvmppc_gsb_new); + +/** + * kvmppc_gsb_free() - free a guest state buffer + * @gsb: guest state buffer + */ +void kvmppc_gsb_free(struct kvmppc_gs_buff *gsb) +{ + kfree(gsb->hdr); + kfree(gsb); +} +EXPORT_SYMBOL_GPL(kvmppc_gsb_free); + +/** + * kvmppc_gsb_put() - allocate space in a guest state buffer + * @gsb: buffer to allocate in + * @size: amount of space to allocate + * + * Returns a pointer to the amount of space requested within the buffer and + * increments the count of elements in the buffer. + * + * Does not check if there is enough space in the buffer. + */ +void *kvmppc_gsb_put(struct kvmppc_gs_buff *gsb, size_t size) +{ + u32 nelems = kvmppc_gsb_nelems(gsb); + void *p; + + p = (void *)kvmppc_gsb_header(gsb) + kvmppc_gsb_len(gsb); + gsb->len += size; + + kvmppc_gsb_header(gsb)->nelems = cpu_to_be32(nelems + 1); + return p; +} +EXPORT_SYMBOL_GPL(kvmppc_gsb_put); + +static int kvmppc_gsid_class(u16 iden) +{ + if ((iden >= KVMPPC_GSE_GUESTWIDE_START) && + (iden <= KVMPPC_GSE_GUESTWIDE_END)) + return KVMPPC_GS_CLASS_GUESTWIDE; + + if ((iden >= KVMPPC_GSE_META_START) && (iden <= KVMPPC_GSE_META_END)) + return KVMPPC_GS_CLASS_META; + + if ((iden >= KVMPPC_GSE_DW_REGS_START) && + (iden <= KVMPPC_GSE_DW_REGS_END)) + return KVMPPC_GS_CLASS_DWORD_REG; + + if ((iden >= KVMPPC_GSE_W_REGS_START) && + (iden <= KVMPPC_GSE_W_REGS_END)) + return KVMPPC_GS_CLASS_WORD_REG; + + if ((iden >= KVMPPC_GSE_VSRS_START) && (iden <= KVMPPC_GSE_VSRS_END)) + return KVMPPC_GS_CLASS_VECTOR; + + if ((iden >= KVMPPC_GSE_INTR_REGS_START) && + (iden <= KVMPPC_GSE_INTR_REGS_END)) + return KVMPPC_GS_CLASS_INTR; + + return -1; +} + +static int kvmppc_gsid_type(u16 iden) +{ + int type = -1; + + switch (kvmppc_gsid_class(iden)) { + case KVMPPC_GS_CLASS_GUESTWIDE: + switch (iden) { + case KVMPPC_GSID_HOST_STATE_SIZE: + case KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE: + case KVMPPC_GSID_TB_OFFSET: + type = KVMPPC_GSE_BE64; + break; + case KVMPPC_GSID_PARTITION_TABLE: + type = KVMPPC_GSE_PARTITION_TABLE; + break; + case KVMPPC_GSID_PROCESS_TABLE: + type = KVMPPC_GSE_PROCESS_TABLE; + break; + case KVMPPC_GSID_LOGICAL_PVR: + type = KVMPPC_GSE_BE32; + break; + } + break; + case KVMPPC_GS_CLASS_META: + switch (iden) { + case KVMPPC_GSID_RUN_INPUT: + case KVMPPC_GSID_RUN_OUTPUT: + type = KVMPPC_GSE_BUFFER; + break; + case KVMPPC_GSID_VPA: + type = KVMPPC_GSE_BE64; + break; + } + break; + case KVMPPC_GS_CLASS_DWORD_REG: + type = KVMPPC_GSE_BE64; + break; + case KVMPPC_GS_CLASS_WORD_REG: + type = KVMPPC_GSE_BE32; + break; + case KVMPPC_GS_CLASS_VECTOR: + type = KVMPPC_GSE_VEC128; + break; + case KVMPPC_GS_CLASS_INTR: + switch (iden) { + case KVMPPC_GSID_HDAR: + case KVMPPC_GSID_ASDR: + case KVMPPC_GSID_HEIR: + type = KVMPPC_GSE_BE64; + break; + case KVMPPC_GSID_HDSISR: + type = KVMPPC_GSE_BE32; + break; + } + break; + } + + return type; +} + +/** + * kvmppc_gsid_flags() - the flags for a guest state ID + * @iden: guest state ID + * + * Returns any flags for the guest state ID. + */ +unsigned long kvmppc_gsid_flags(u16 iden) +{ + unsigned long flags = 0; + + switch (kvmppc_gsid_class(iden)) { + case KVMPPC_GS_CLASS_GUESTWIDE: + flags = KVMPPC_GS_FLAGS_WIDE; + break; + case KVMPPC_GS_CLASS_META: + case KVMPPC_GS_CLASS_DWORD_REG: + case KVMPPC_GS_CLASS_WORD_REG: + case KVMPPC_GS_CLASS_VECTOR: + case KVMPPC_GS_CLASS_INTR: + break; + } + + return flags; +} +EXPORT_SYMBOL_GPL(kvmppc_gsid_flags); + +/** + * kvmppc_gsid_size() - the size of a guest state ID + * @iden: guest state ID + * + * Returns the size of guest state ID. + */ +u16 kvmppc_gsid_size(u16 iden) +{ + int type; + + type = kvmppc_gsid_type(iden); + if (type == -1) + return 0; + + if (type >= __KVMPPC_GSE_TYPE_MAX) + return 0; + + return kvmppc_gse_iden_len[type]; +} +EXPORT_SYMBOL_GPL(kvmppc_gsid_size); + +/** + * kvmppc_gsid_mask() - the settable bits of a guest state ID + * @iden: guest state ID + * + * Returns a mask of settable bits for a guest state ID. + */ +u64 kvmppc_gsid_mask(u16 iden) +{ + u64 mask = ~0ull; + + switch (iden) { + case KVMPPC_GSID_LPCR: + mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER | + LPCR_GTSE; + break; + case KVMPPC_GSID_MSR: + mask = ~(MSR_HV | MSR_S | MSR_ME); + break; + } + + return mask; +} +EXPORT_SYMBOL_GPL(kvmppc_gsid_mask); + +/** + * __kvmppc_gse_put() - add a guest state element to a buffer + * @gsb: buffer to the element to + * @iden: guest state ID + * @size: length of data + * @data: pointer to data + */ +int __kvmppc_gse_put(struct kvmppc_gs_buff *gsb, u16 iden, u16 size, + const void *data) +{ + struct kvmppc_gs_elem *gse; + u16 total_size; + + total_size = sizeof(*gse) + size; + if (total_size + kvmppc_gsb_len(gsb) > kvmppc_gsb_capacity(gsb)) + return -ENOMEM; + + if (kvmppc_gsid_size(iden) != size) + return -EINVAL; + + gse = kvmppc_gsb_put(gsb, total_size); + gse->iden = cpu_to_be16(iden); + gse->len = cpu_to_be16(size); + memcpy(gse->data, data, size); + + return 0; +} +EXPORT_SYMBOL_GPL(__kvmppc_gse_put); + +/** + * kvmppc_gse_parse() - create a parse map from a guest state buffer + * @gsp: guest state parser + * @gsb: guest state buffer + */ +int kvmppc_gse_parse(struct kvmppc_gs_parser *gsp, struct kvmppc_gs_buff *gsb) +{ + struct kvmppc_gs_elem *curr; + int rem, i; + + kvmppc_gsb_for_each_elem(i, curr, gsb, rem) { + if (kvmppc_gse_len(curr) != + kvmppc_gsid_size(kvmppc_gse_iden(curr))) + return -EINVAL; + kvmppc_gsp_insert(gsp, kvmppc_gse_iden(curr), curr); + } + + if (kvmppc_gsb_nelems(gsb) != i) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_gse_parse); + +static inline int kvmppc_gse_flatten_iden(u16 iden) +{ + int bit = 0; + int class; + + class = kvmppc_gsid_class(iden); + + if (class == KVMPPC_GS_CLASS_GUESTWIDE) { + bit += iden - KVMPPC_GSE_GUESTWIDE_START; + return bit; + } + + bit += KVMPPC_GSE_GUESTWIDE_COUNT; + + if (class == KVMPPC_GS_CLASS_META) { + bit += iden - KVMPPC_GSE_META_START; + return bit; + } + + bit += KVMPPC_GSE_META_COUNT; + + if (class == KVMPPC_GS_CLASS_DWORD_REG) { + bit += iden - KVMPPC_GSE_DW_REGS_START; + return bit; + } + + bit += KVMPPC_GSE_DW_REGS_COUNT; + + if (class == KVMPPC_GS_CLASS_WORD_REG) { + bit += iden - KVMPPC_GSE_W_REGS_START; + return bit; + } + + bit += KVMPPC_GSE_W_REGS_COUNT; + + if (class == KVMPPC_GS_CLASS_VECTOR) { + bit += iden - KVMPPC_GSE_VSRS_START; + return bit; + } + + bit += KVMPPC_GSE_VSRS_COUNT; + + if (class == KVMPPC_GS_CLASS_INTR) { + bit += iden - KVMPPC_GSE_INTR_REGS_START; + return bit; + } + + return 0; +} + +static inline u16 kvmppc_gse_unflatten_iden(int bit) +{ + u16 iden; + + if (bit < KVMPPC_GSE_GUESTWIDE_COUNT) { + iden = KVMPPC_GSE_GUESTWIDE_START + bit; + return iden; + } + bit -= KVMPPC_GSE_GUESTWIDE_COUNT; + + if (bit < KVMPPC_GSE_META_COUNT) { + iden = KVMPPC_GSE_META_START + bit; + return iden; + } + bit -= KVMPPC_GSE_META_COUNT; + + if (bit < KVMPPC_GSE_DW_REGS_COUNT) { + iden = KVMPPC_GSE_DW_REGS_START + bit; + return iden; + } + bit -= KVMPPC_GSE_DW_REGS_COUNT; + + if (bit < KVMPPC_GSE_W_REGS_COUNT) { + iden = KVMPPC_GSE_W_REGS_START + bit; + return iden; + } + bit -= KVMPPC_GSE_W_REGS_COUNT; + + if (bit < KVMPPC_GSE_VSRS_COUNT) { + iden = KVMPPC_GSE_VSRS_START + bit; + return iden; + } + bit -= KVMPPC_GSE_VSRS_COUNT; + + if (bit < KVMPPC_GSE_IDEN_COUNT) { + iden = KVMPPC_GSE_INTR_REGS_START + bit; + return iden; + } + + return 0; +} + +/** + * kvmppc_gsp_insert() - add a mapping from an guest state ID to an element + * @gsp: guest state parser + * @iden: guest state id (key) + * @gse: guest state element (value) + */ +void kvmppc_gsp_insert(struct kvmppc_gs_parser *gsp, u16 iden, + struct kvmppc_gs_elem *gse) +{ + int i; + + i = kvmppc_gse_flatten_iden(iden); + kvmppc_gsbm_set(&gsp->iterator, iden); + gsp->gses[i] = gse; +} +EXPORT_SYMBOL_GPL(kvmppc_gsp_insert); + +/** + * kvmppc_gsp_lookup() - lookup an element from a guest state ID + * @gsp: guest state parser + * @iden: guest state ID (key) + * + * Returns the guest state element if present. + */ +struct kvmppc_gs_elem *kvmppc_gsp_lookup(struct kvmppc_gs_parser *gsp, u16 iden) +{ + int i; + + i = kvmppc_gse_flatten_iden(iden); + return gsp->gses[i]; +} +EXPORT_SYMBOL_GPL(kvmppc_gsp_lookup); + +/** + * kvmppc_gsbm_set() - set the guest state ID + * @gsbm: guest state bitmap + * @iden: guest state ID + */ +void kvmppc_gsbm_set(struct kvmppc_gs_bitmap *gsbm, u16 iden) +{ + set_bit(kvmppc_gse_flatten_iden(iden), gsbm->bitmap); +} +EXPORT_SYMBOL_GPL(kvmppc_gsbm_set); + +/** + * kvmppc_gsbm_clear() - clear the guest state ID + * @gsbm: guest state bitmap + * @iden: guest state ID + */ +void kvmppc_gsbm_clear(struct kvmppc_gs_bitmap *gsbm, u16 iden) +{ + clear_bit(kvmppc_gse_flatten_iden(iden), gsbm->bitmap); +} +EXPORT_SYMBOL_GPL(kvmppc_gsbm_clear); + +/** + * kvmppc_gsbm_test() - test the guest state ID + * @gsbm: guest state bitmap + * @iden: guest state ID + */ +bool kvmppc_gsbm_test(struct kvmppc_gs_bitmap *gsbm, u16 iden) +{ + return test_bit(kvmppc_gse_flatten_iden(iden), gsbm->bitmap); +} +EXPORT_SYMBOL_GPL(kvmppc_gsbm_test); + +/** + * kvmppc_gsbm_next() - return the next set guest state ID + * @gsbm: guest state bitmap + * @prev: last guest state ID + */ +u16 kvmppc_gsbm_next(struct kvmppc_gs_bitmap *gsbm, u16 prev) +{ + int bit, pbit; + + pbit = prev ? kvmppc_gse_flatten_iden(prev) + 1 : 0; + bit = find_next_bit(gsbm->bitmap, KVMPPC_GSE_IDEN_COUNT, pbit); + + if (bit < KVMPPC_GSE_IDEN_COUNT) + return kvmppc_gse_unflatten_iden(bit); + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_gsbm_next); + +/** + * kvmppc_gsm_init() - initialize a guest state message + * @gsm: guest state message + * @ops: callbacks + * @data: private data + * @flags: guest wide or thread wide + */ +int kvmppc_gsm_init(struct kvmppc_gs_msg *gsm, struct kvmppc_gs_msg_ops *ops, + void *data, unsigned long flags) +{ + memset(gsm, 0, sizeof(*gsm)); + gsm->ops = ops; + gsm->data = data; + gsm->flags = flags; + + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_gsm_init); + +/** + * kvmppc_gsm_new() - creates a new guest state message + * @ops: callbacks + * @data: private data + * @flags: guest wide or thread wide + * @gfp_flags: GFP allocation flags + * + * Returns an initialized guest state message. + */ +struct kvmppc_gs_msg *kvmppc_gsm_new(struct kvmppc_gs_msg_ops *ops, void *data, + unsigned long flags, gfp_t gfp_flags) +{ + struct kvmppc_gs_msg *gsm; + + gsm = kzalloc(sizeof(*gsm), gfp_flags); + if (!gsm) + return NULL; + + kvmppc_gsm_init(gsm, ops, data, flags); + + return gsm; +} +EXPORT_SYMBOL_GPL(kvmppc_gsm_new); + +/** + * kvmppc_gsm_size() - creates a new guest state message + * @gsm: self + * + * Returns the size required for the message. + */ +size_t kvmppc_gsm_size(struct kvmppc_gs_msg *gsm) +{ + if (gsm->ops->get_size) + return gsm->ops->get_size(gsm); + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_gsm_size); + +/** + * kvmppc_gsm_free() - free guest state message + * @gsm: guest state message + * + * Returns the size required for the message. + */ +void kvmppc_gsm_free(struct kvmppc_gs_msg *gsm) +{ + kfree(gsm); +} +EXPORT_SYMBOL_GPL(kvmppc_gsm_free); + +/** + * kvmppc_gsm_fill_info() - serialises message to guest state buffer format + * @gsm: self + * @gsb: buffer to serialise into + */ +int kvmppc_gsm_fill_info(struct kvmppc_gs_msg *gsm, struct kvmppc_gs_buff *gsb) +{ + if (!gsm->ops->fill_info) + return -EINVAL; + + return gsm->ops->fill_info(gsb, gsm); +} +EXPORT_SYMBOL_GPL(kvmppc_gsm_fill_info); + +/** + * kvmppc_gsm_refresh_info() - deserialises from guest state buffer + * @gsm: self + * @gsb: buffer to serialise from + */ +int kvmppc_gsm_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb) +{ + if (!gsm->ops->fill_info) + return -EINVAL; + + return gsm->ops->refresh_info(gsm, gsb); +} +EXPORT_SYMBOL_GPL(kvmppc_gsm_refresh_info); + +/** + * kvmppc_gsb_send - send all elements in the buffer to the hypervisor. + * @gsb: guest state buffer + * @flags: guest wide or thread wide + * + * Performs the H_GUEST_SET_STATE hcall for the guest state buffer. + */ +int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags) +{ + unsigned long hflags = 0; + unsigned long i; + int rc; + + if (kvmppc_gsb_nelems(gsb) == 0) + return 0; + + if (flags & KVMPPC_GS_FLAGS_WIDE) + hflags |= H_GUEST_FLAGS_WIDE; + + rc = plpar_guest_set_state(hflags, gsb->guest_id, gsb->vcpu_id, + __pa(gsb->hdr), gsb->capacity, &i); + return rc; +} +EXPORT_SYMBOL_GPL(kvmppc_gsb_send); + +/** + * kvmppc_gsb_recv - request all elements in the buffer have their value + * updated. + * @gsb: guest state buffer + * @flags: guest wide or thread wide + * + * Performs the H_GUEST_GET_STATE hcall for the guest state buffer. + * After returning from the hcall the guest state elements that were + * present in the buffer will have updated values from the hypervisor. + */ +int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags) +{ + unsigned long hflags = 0; + unsigned long i; + int rc; + + if (flags & KVMPPC_GS_FLAGS_WIDE) + hflags |= H_GUEST_FLAGS_WIDE; + + rc = plpar_guest_get_state(hflags, gsb->guest_id, gsb->vcpu_id, + __pa(gsb->hdr), gsb->capacity, &i); + return rc; +} +EXPORT_SYMBOL_GPL(kvmppc_gsb_recv); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7197c8256668..f6af752698d0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -934,11 +934,11 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu, return; if (index >= 32) { - val.vval = VCPU_VSX_VR(vcpu, index - 32); + kvmppc_get_vsx_vr(vcpu, index - 32, &val.vval); val.vsxval[offset] = gpr; - VCPU_VSX_VR(vcpu, index - 32) = val.vval; + kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval); } else { - VCPU_VSX_FPR(vcpu, index, offset) = gpr; + kvmppc_set_vsx_fpr(vcpu, index, offset, gpr); } } @@ -949,13 +949,13 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu, int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; if (index >= 32) { - val.vval = VCPU_VSX_VR(vcpu, index - 32); + kvmppc_get_vsx_vr(vcpu, index - 32, &val.vval); val.vsxval[0] = gpr; val.vsxval[1] = gpr; - VCPU_VSX_VR(vcpu, index - 32) = val.vval; + kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval); } else { - VCPU_VSX_FPR(vcpu, index, 0) = gpr; - VCPU_VSX_FPR(vcpu, index, 1) = gpr; + kvmppc_set_vsx_fpr(vcpu, index, 0, gpr); + kvmppc_set_vsx_fpr(vcpu, index, 1, gpr); } } @@ -970,12 +970,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu, val.vsx32val[1] = gpr; val.vsx32val[2] = gpr; val.vsx32val[3] = gpr; - VCPU_VSX_VR(vcpu, index - 32) = val.vval; + kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval); } else { val.vsx32val[0] = gpr; val.vsx32val[1] = gpr; - VCPU_VSX_FPR(vcpu, index, 0) = val.vsxval[0]; - VCPU_VSX_FPR(vcpu, index, 1) = val.vsxval[0]; + kvmppc_set_vsx_fpr(vcpu, index, 0, val.vsxval[0]); + kvmppc_set_vsx_fpr(vcpu, index, 1, val.vsxval[0]); } } @@ -991,15 +991,15 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, return; if (index >= 32) { - val.vval = VCPU_VSX_VR(vcpu, index - 32); + kvmppc_get_vsx_vr(vcpu, index - 32, &val.vval); val.vsx32val[offset] = gpr32; - VCPU_VSX_VR(vcpu, index - 32) = val.vval; + kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval); } else { dword_offset = offset / 2; word_offset = offset % 2; - val.vsxval[0] = VCPU_VSX_FPR(vcpu, index, dword_offset); + val.vsxval[0] = kvmppc_get_vsx_fpr(vcpu, index, dword_offset); val.vsx32val[word_offset] = gpr32; - VCPU_VSX_FPR(vcpu, index, dword_offset) = val.vsxval[0]; + kvmppc_set_vsx_fpr(vcpu, index, dword_offset, val.vsxval[0]); } } #endif /* CONFIG_VSX */ @@ -1058,9 +1058,9 @@ static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu, if (offset == -1) return; - val.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, &val.vval); val.vsxval[offset] = gpr; - VCPU_VSX_VR(vcpu, index) = val.vval; + kvmppc_set_vsx_vr(vcpu, index, &val.vval); } static inline void kvmppc_set_vmx_word(struct kvm_vcpu *vcpu, @@ -1074,9 +1074,9 @@ static inline void kvmppc_set_vmx_word(struct kvm_vcpu *vcpu, if (offset == -1) return; - val.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, &val.vval); val.vsx32val[offset] = gpr32; - VCPU_VSX_VR(vcpu, index) = val.vval; + kvmppc_set_vsx_vr(vcpu, index, &val.vval); } static inline void kvmppc_set_vmx_hword(struct kvm_vcpu *vcpu, @@ -1090,9 +1090,9 @@ static inline void kvmppc_set_vmx_hword(struct kvm_vcpu *vcpu, if (offset == -1) return; - val.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, &val.vval); val.vsx16val[offset] = gpr16; - VCPU_VSX_VR(vcpu, index) = val.vval; + kvmppc_set_vsx_vr(vcpu, index, &val.vval); } static inline void kvmppc_set_vmx_byte(struct kvm_vcpu *vcpu, @@ -1106,9 +1106,9 @@ static inline void kvmppc_set_vmx_byte(struct kvm_vcpu *vcpu, if (offset == -1) return; - val.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, &val.vval); val.vsx8val[offset] = gpr8; - VCPU_VSX_VR(vcpu, index) = val.vval; + kvmppc_set_vsx_vr(vcpu, index, &val.vval); } #endif /* CONFIG_ALTIVEC */ @@ -1194,14 +1194,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.kvm_ops->giveup_ext) vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_FP); - VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; + kvmppc_set_fpr(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK, gpr); break; #ifdef CONFIG_PPC_BOOK3S case KVM_MMIO_REG_QPR: vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; case KVM_MMIO_REG_FQPR: - VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; + kvmppc_set_fpr(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK, gpr); vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; #endif @@ -1419,9 +1419,9 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) } if (rs < 32) { - *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset); + *val = kvmppc_get_vsx_fpr(vcpu, rs, vsx_offset); } else { - reg.vval = VCPU_VSX_VR(vcpu, rs - 32); + kvmppc_get_vsx_vr(vcpu, rs - 32, ®.vval); *val = reg.vsxval[vsx_offset]; } break; @@ -1438,10 +1438,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) if (rs < 32) { dword_offset = vsx_offset / 2; word_offset = vsx_offset % 2; - reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset); + reg.vsxval[0] = kvmppc_get_vsx_fpr(vcpu, rs, dword_offset); *val = reg.vsx32val[word_offset]; } else { - reg.vval = VCPU_VSX_VR(vcpu, rs - 32); + kvmppc_get_vsx_vr(vcpu, rs - 32, ®.vval); *val = reg.vsx32val[vsx_offset]; } break; @@ -1556,7 +1556,7 @@ static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) if (vmx_offset == -1) return -1; - reg.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, ®.vval); *val = reg.vsxval[vmx_offset]; return result; @@ -1574,7 +1574,7 @@ static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) if (vmx_offset == -1) return -1; - reg.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, ®.vval); *val = reg.vsx32val[vmx_offset]; return result; @@ -1592,7 +1592,7 @@ static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) if (vmx_offset == -1) return -1; - reg.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, ®.vval); *val = reg.vsx16val[vmx_offset]; return result; @@ -1610,7 +1610,7 @@ static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) if (vmx_offset == -1) return -1; - reg.vval = VCPU_VSX_VR(vcpu, index); + kvmppc_get_vsx_vr(vcpu, index, ®.vval); *val = reg.vsx8val[vmx_offset]; return result; @@ -1719,17 +1719,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; + kvmppc_get_vsx_vr(vcpu, reg->id - KVM_REG_PPC_VR0, &val.vval); break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + val = get_reg_val(reg->id, kvmppc_get_vscr(vcpu)); break; case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); + val = get_reg_val(reg->id, kvmppc_get_vrsave(vcpu)); break; #endif /* CONFIG_ALTIVEC */ default: @@ -1770,21 +1770,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + kvmppc_set_vsx_vr(vcpu, reg->id - KVM_REG_PPC_VR0, &val.vval); break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); + kvmppc_set_vscr(vcpu, set_reg_val(reg->id, val)); break; case KVM_REG_PPC_VRSAVE: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - vcpu->arch.vrsave = set_reg_val(reg->id, val); + kvmppc_set_vrsave(vcpu, set_reg_val(reg->id, val)); break; #endif /* CONFIG_ALTIVEC */ default: diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c new file mode 100644 index 000000000000..4720b8dc8837 --- /dev/null +++ b/arch/powerpc/kvm/test-guest-state-buffer.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/init.h> +#include <linux/log2.h> +#include <kunit/test.h> + +#include <asm/guest-state-buffer.h> + +static void test_creating_buffer(struct kunit *test) +{ + struct kvmppc_gs_buff *gsb; + size_t size = 0x100; + + gsb = kvmppc_gsb_new(size, 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb->hdr); + + KUNIT_EXPECT_EQ(test, gsb->capacity, roundup_pow_of_two(size)); + KUNIT_EXPECT_EQ(test, gsb->len, sizeof(__be32)); + + kvmppc_gsb_free(gsb); +} + +static void test_adding_element(struct kunit *test) +{ + const struct kvmppc_gs_elem *head, *curr; + union { + __vector128 v; + u64 dw[2]; + } u; + int rem; + struct kvmppc_gs_buff *gsb; + size_t size = 0x1000; + int i, rc; + u64 data; + + gsb = kvmppc_gsb_new(size, 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + /* Single elements, direct use of __kvmppc_gse_put() */ + data = 0xdeadbeef; + rc = __kvmppc_gse_put(gsb, KVMPPC_GSID_GPR(0), 8, &data); + KUNIT_EXPECT_GE(test, rc, 0); + + head = kvmppc_gsb_data(gsb); + KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(head), KVMPPC_GSID_GPR(0)); + KUNIT_EXPECT_EQ(test, kvmppc_gse_len(head), 8); + data = 0; + memcpy(&data, kvmppc_gse_data(head), 8); + KUNIT_EXPECT_EQ(test, data, 0xdeadbeef); + + /* Multiple elements, simple wrapper */ + rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_GPR(1), 0xcafef00d); + KUNIT_EXPECT_GE(test, rc, 0); + + u.dw[0] = 0x1; + u.dw[1] = 0x2; + rc = kvmppc_gse_put_vector128(gsb, KVMPPC_GSID_VSRS(0), &u.v); + KUNIT_EXPECT_GE(test, rc, 0); + u.dw[0] = 0x0; + u.dw[1] = 0x0; + + kvmppc_gsb_for_each_elem(i, curr, gsb, rem) { + switch (i) { + case 0: + KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(curr), + KVMPPC_GSID_GPR(0)); + KUNIT_EXPECT_EQ(test, kvmppc_gse_len(curr), 8); + KUNIT_EXPECT_EQ(test, kvmppc_gse_get_be64(curr), + 0xdeadbeef); + break; + case 1: + KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(curr), + KVMPPC_GSID_GPR(1)); + KUNIT_EXPECT_EQ(test, kvmppc_gse_len(curr), 8); + KUNIT_EXPECT_EQ(test, kvmppc_gse_get_u64(curr), + 0xcafef00d); + break; + case 2: + KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(curr), + KVMPPC_GSID_VSRS(0)); + KUNIT_EXPECT_EQ(test, kvmppc_gse_len(curr), 16); + kvmppc_gse_get_vector128(curr, &u.v); + KUNIT_EXPECT_EQ(test, u.dw[0], 0x1); + KUNIT_EXPECT_EQ(test, u.dw[1], 0x2); + break; + } + } + KUNIT_EXPECT_EQ(test, i, 3); + + kvmppc_gsb_reset(gsb); + KUNIT_EXPECT_EQ(test, kvmppc_gsb_nelems(gsb), 0); + KUNIT_EXPECT_EQ(test, kvmppc_gsb_len(gsb), + sizeof(struct kvmppc_gs_header)); + + kvmppc_gsb_free(gsb); +} + +static void test_gs_parsing(struct kunit *test) +{ + struct kvmppc_gs_elem *gse; + struct kvmppc_gs_parser gsp = { 0 }; + struct kvmppc_gs_buff *gsb; + size_t size = 0x1000; + u64 tmp1, tmp2; + + gsb = kvmppc_gsb_new(size, 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + tmp1 = 0xdeadbeefull; + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_GPR(0), tmp1); + + KUNIT_EXPECT_GE(test, kvmppc_gse_parse(&gsp, gsb), 0); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_GPR(0)); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gse); + + tmp2 = kvmppc_gse_get_u64(gse); + KUNIT_EXPECT_EQ(test, tmp2, 0xdeadbeefull); + + kvmppc_gsb_free(gsb); +} + +static void test_gs_bitmap(struct kunit *test) +{ + struct kvmppc_gs_bitmap gsbm = { 0 }; + struct kvmppc_gs_bitmap gsbm1 = { 0 }; + struct kvmppc_gs_bitmap gsbm2 = { 0 }; + u16 iden; + int i, j; + + i = 0; + for (u16 iden = KVMPPC_GSID_HOST_STATE_SIZE; + iden <= KVMPPC_GSID_PROCESS_TABLE; iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + + for (u16 iden = KVMPPC_GSID_RUN_INPUT; iden <= KVMPPC_GSID_VPA; + iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + + for (u16 iden = KVMPPC_GSID_GPR(0); iden <= KVMPPC_GSID_CTRL; iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + + for (u16 iden = KVMPPC_GSID_CR; iden <= KVMPPC_GSID_PSPB; iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + + for (u16 iden = KVMPPC_GSID_VSRS(0); iden <= KVMPPC_GSID_VSRS(63); + iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + + for (u16 iden = KVMPPC_GSID_HDAR; iden <= KVMPPC_GSID_ASDR; iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + + j = 0; + kvmppc_gsbm_for_each(&gsbm1, iden) + { + kvmppc_gsbm_set(&gsbm2, iden); + j++; + } + KUNIT_EXPECT_EQ(test, i, j); + KUNIT_EXPECT_MEMEQ(test, &gsbm1, &gsbm2, sizeof(gsbm1)); +} + +struct kvmppc_gs_msg_test1_data { + u64 a; + u32 b; + struct kvmppc_gs_part_table c; + struct kvmppc_gs_proc_table d; + struct kvmppc_gs_buff_info e; +}; + +static size_t test1_get_size(struct kvmppc_gs_msg *gsm) +{ + size_t size = 0; + u16 ids[] = { + KVMPPC_GSID_PARTITION_TABLE, + KVMPPC_GSID_PROCESS_TABLE, + KVMPPC_GSID_RUN_INPUT, + KVMPPC_GSID_GPR(0), + KVMPPC_GSID_CR, + }; + + for (int i = 0; i < ARRAY_SIZE(ids); i++) + size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i])); + return size; +} + +static int test1_fill_info(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + struct kvmppc_gs_msg_test1_data *data = gsm->data; + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_GPR(0))) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_GPR(0), data->a); + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_CR)) + kvmppc_gse_put_u32(gsb, KVMPPC_GSID_CR, data->b); + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_PARTITION_TABLE)) + kvmppc_gse_put_part_table(gsb, KVMPPC_GSID_PARTITION_TABLE, + data->c); + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_PROCESS_TABLE)) + kvmppc_gse_put_proc_table(gsb, KVMPPC_GSID_PARTITION_TABLE, + data->d); + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_INPUT)) + kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_INPUT, data->e); + + return 0; +} + +static int test1_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb) +{ + struct kvmppc_gs_parser gsp = { 0 }; + struct kvmppc_gs_msg_test1_data *data = gsm->data; + struct kvmppc_gs_elem *gse; + int rc; + + rc = kvmppc_gse_parse(&gsp, gsb); + if (rc < 0) + return rc; + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_GPR(0)); + if (gse) + data->a = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_CR); + if (gse) + data->b = kvmppc_gse_get_u32(gse); + + return 0; +} + +static struct kvmppc_gs_msg_ops gs_msg_test1_ops = { + .get_size = test1_get_size, + .fill_info = test1_fill_info, + .refresh_info = test1_refresh_info, +}; + +static void test_gs_msg(struct kunit *test) +{ + struct kvmppc_gs_msg_test1_data test1_data = { + .a = 0xdeadbeef, + .b = 0x1, + }; + struct kvmppc_gs_msg *gsm; + struct kvmppc_gs_buff *gsb; + + gsm = kvmppc_gsm_new(&gs_msg_test1_ops, &test1_data, GSM_SEND, + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm); + + gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_PARTITION_TABLE); + kvmppc_gsm_include(gsm, KVMPPC_GSID_PROCESS_TABLE); + kvmppc_gsm_include(gsm, KVMPPC_GSID_RUN_INPUT); + kvmppc_gsm_include(gsm, KVMPPC_GSID_GPR(0)); + kvmppc_gsm_include(gsm, KVMPPC_GSID_CR); + + kvmppc_gsm_fill_info(gsm, gsb); + + memset(&test1_data, 0, sizeof(test1_data)); + + kvmppc_gsm_refresh_info(gsm, gsb); + KUNIT_EXPECT_EQ(test, test1_data.a, 0xdeadbeef); + KUNIT_EXPECT_EQ(test, test1_data.b, 0x1); + + kvmppc_gsm_free(gsm); +} + +static struct kunit_case guest_state_buffer_testcases[] = { + KUNIT_CASE(test_creating_buffer), + KUNIT_CASE(test_adding_element), + KUNIT_CASE(test_gs_bitmap), + KUNIT_CASE(test_gs_parsing), + KUNIT_CASE(test_gs_msg), + {} +}; + +static struct kunit_suite guest_state_buffer_test_suite = { + .name = "guest_state_buffer_test", + .test_cases = guest_state_buffer_testcases, +}; + +kunit_test_suites(&guest_state_buffer_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index b00112d7ad46..c6ab46156cda 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -38,6 +38,7 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr return 0; failed: + mb(); /* sync */ return -EPERM; } @@ -204,9 +205,6 @@ void __init poking_init(void) { int ret; - if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - return; - if (mm_patch_enabled()) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke_mm:online", @@ -309,10 +307,6 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) err = __patch_instruction(addr, instr, patch_addr); - /* hwsync performed by __patch_instruction (sync) if successful */ - if (err) - mb(); /* sync */ - /* context synchronisation performed by __patch_instruction (isync or exception) */ stop_using_temp_mm(patching_mm, orig_mm); @@ -378,6 +372,144 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) } NOKPROBE_SYMBOL(patch_instruction); +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long start = (unsigned long)patch_addr; + + /* Repeat instruction */ + if (repeat_instr) { + ppc_inst_t instr = ppc_inst_read(code); + + if (ppc_inst_prefixed(instr)) { + u64 val = ppc_inst_as_ulong(instr); + + memset64((u64 *)patch_addr, val, len / 8); + } else { + u32 val = ppc_inst_val(instr); + + memset32(patch_addr, val, len / 4); + } + } else { + memcpy(patch_addr, code, len); + } + + smp_wmb(); /* smp write barrier */ + flush_icache_range(start, start + len); + return 0; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + struct mm_struct *patching_mm, *orig_mm; + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + spinlock_t *ptl; + u32 *patch_addr; + pte_t *pte; + int err; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync" ::: "memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + /* context synchronisation performed by __patch_instructions */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + u32 *patch_addr; + pte_t *pte; + int err; + + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync" ::: "memory"); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; +} + +/* + * Patch 'addr' with 'len' bytes of instructions from 'code'. + * + * If repeat_instr is true, the same instruction is filled for + * 'len' bytes. + */ +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + while (len > 0) { + unsigned long flags; + size_t plen; + int err; + + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); + + local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); + else + err = __do_patch_instructions(addr, code, plen, repeat_instr); + local_irq_restore(flags); + if (err) + return err; + + len -= plen; + addr = (u32 *)((unsigned long)addr + plen); + if (!repeat_instr) + code = (u32 *)((unsigned long)code + plen); + } + + return 0; +} +NOKPROBE_SYMBOL(patch_instructions); + int patch_branch(u32 *addr, unsigned long target, int flags) { ppc_inst_t instr; diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 6dd2f46bd3ef..5de4dd549f6e 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -16,7 +16,8 @@ struct qnode { struct qnode *next; struct qspinlock *lock; int cpu; - int yield_cpu; + u8 sleepy; /* 1 if the previous vCPU was preempted or + * if the previous node was sleepy */ u8 locked; /* 1 if lock acquired */ }; @@ -43,7 +44,7 @@ static bool pv_sleepy_lock_sticky __read_mostly = false; static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; static int pv_sleepy_lock_factor __read_mostly = 256; static bool pv_yield_prev __read_mostly = true; -static bool pv_yield_propagate_owner __read_mostly = true; +static bool pv_yield_sleepy_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -247,22 +248,18 @@ static __always_inline void seen_sleepy_lock(void) this_cpu_write(sleepy_lock_seen_clock, sched_clock()); } -static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) +static __always_inline void seen_sleepy_node(void) { if (pv_sleepy_lock) { if (pv_sleepy_lock_interval_ns) this_cpu_write(sleepy_lock_seen_clock, sched_clock()); - if (val & _Q_LOCKED_VAL) { - if (!(val & _Q_SLEEPY_VAL)) - try_set_sleepy(lock, val); - } + /* Don't set sleepy because we likely have a stale val */ } } -static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) { - int cpu = decode_tail_cpu(val); - struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); int idx; /* @@ -353,77 +350,66 @@ static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u return __yield_to_locked_owner(lock, val, paravirt, mustq); } -static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) +static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt) { struct qnode *next; int owner; if (!paravirt) return; - if (!pv_yield_propagate_owner) - return; - - owner = get_owner_cpu(val); - if (*set_yield_cpu == owner) + if (!pv_yield_sleepy_owner) return; next = READ_ONCE(node->next); if (!next) return; - if (vcpu_is_preempted(owner)) { - next->yield_cpu = owner; - *set_yield_cpu = owner; - } else if (*set_yield_cpu != -1) { - next->yield_cpu = owner; - *set_yield_cpu = owner; - } + if (next->sleepy) + return; + + owner = get_owner_cpu(val); + if (vcpu_is_preempted(owner)) + next->sleepy = 1; } /* Called inside spin_begin() */ -static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) { - int prev_cpu = decode_tail_cpu(val); u32 yield_count; - int yield_cpu; bool preempted = false; if (!paravirt) goto relax; - if (!pv_yield_propagate_owner) - goto yield_prev; - - yield_cpu = READ_ONCE(node->yield_cpu); - if (yield_cpu == -1) { - /* Propagate back the -1 CPU */ - if (node->next && node->next->yield_cpu != -1) - node->next->yield_cpu = yield_cpu; + if (!pv_yield_sleepy_owner) goto yield_prev; - } - - yield_count = yield_count_of(yield_cpu); - if ((yield_count & 1) == 0) - goto yield_prev; /* owner vcpu is running */ - - if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu) - goto yield_prev; /* re-sample lock owner */ - spin_end(); - - preempted = true; - seen_sleepy_node(lock, val); + /* + * If the previous waiter was preempted it might not be able to + * propagate sleepy to us, so check the lock in that case too. + */ + if (node->sleepy || vcpu_is_preempted(prev_cpu)) { + u32 val = READ_ONCE(lock->val); - smp_rmb(); + if (val & _Q_LOCKED_VAL) { + if (node->next && !node->next->sleepy) { + /* + * Propagate sleepy to next waiter. Only if + * owner is preempted, which allows the queue + * to become "non-sleepy" if vCPU preemption + * ceases to occur, even if the lock remains + * highly contended. + */ + if (vcpu_is_preempted(get_owner_cpu(val))) + node->next->sleepy = 1; + } - if (yield_cpu == node->yield_cpu) { - if (node->next && node->next->yield_cpu != yield_cpu) - node->next->yield_cpu = yield_cpu; - yield_to_preempted(yield_cpu, yield_count); - spin_begin(); - return preempted; + preempted = yield_to_locked_owner(lock, val, paravirt); + if (preempted) + return preempted; + } + node->sleepy = false; } - spin_begin(); yield_prev: if (!pv_yield_prev) @@ -436,7 +422,7 @@ yield_prev: spin_end(); preempted = true; - seen_sleepy_node(lock, val); + seen_sleepy_node(); smp_rmb(); /* See __yield_to_locked_owner comment */ @@ -546,7 +532,6 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b bool sleepy = false; bool mustq = false; int idx; - int set_yield_cpu = -1; int iters = 0; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -570,7 +555,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node->next = NULL; node->lock = lock; node->cpu = smp_processor_id(); - node->yield_cpu = -1; + node->sleepy = 0; node->locked = 0; tail = encode_tail_cpu(node->cpu); @@ -587,7 +572,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b * head of the waitqueue. */ if (old & _Q_TAIL_CPU_MASK) { - struct qnode *prev = get_tail_qnode(lock, old); + int prev_cpu = decode_tail_cpu(old); + struct qnode *prev = get_tail_qnode(lock, prev_cpu); /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); @@ -597,16 +583,12 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b while (!READ_ONCE(node->locked)) { spec_barrier(); - if (yield_to_prev(lock, node, old, paravirt)) + if (yield_to_prev(lock, node, prev_cpu, paravirt)) seen_preempted = true; } spec_barrier(); spin_end(); - /* Clear out stale propagated yield_cpu */ - if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) - node->yield_cpu = -1; - smp_rmb(); /* acquire barrier for the mcs lock */ /* @@ -648,7 +630,7 @@ again: } } - propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); + propagate_sleepy(node, val, paravirt); preempted = yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; @@ -952,21 +934,21 @@ static int pv_yield_prev_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); -static int pv_yield_propagate_owner_set(void *data, u64 val) +static int pv_yield_sleepy_owner_set(void *data, u64 val) { - pv_yield_propagate_owner = !!val; + pv_yield_sleepy_owner = !!val; return 0; } -static int pv_yield_propagate_owner_get(void *data, u64 *val) +static int pv_yield_sleepy_owner_get(void *data, u64 *val) { - *val = pv_yield_propagate_owner; + *val = pv_yield_sleepy_owner; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n"); static int pv_prod_head_set(void *data, u64 val) { @@ -998,7 +980,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); - debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); + debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); } diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 8b804e1a9fa4..4ed0efd03db5 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -36,8 +36,9 @@ /* * Load a PTE into the hash table, if possible. - * The address is in r4, and r3 contains an access flag: - * _PAGE_RW (0x400) if a write. + * The address is in r4, and r3 contains required access flags: + * - For ISI: _PAGE_PRESENT | _PAGE_EXEC + * - For DSI: _PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE if a write. * r9 contains the SRR1 value, from which we use the MSR_PR bit. * SPRG_THREAD contains the physical address of the current task's thread. * @@ -67,12 +68,16 @@ _GLOBAL(hash_page) lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ - ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ lwz r5,PGDIR(r8) /* virt page-table root */ blt+ 112f /* assume user more likely */ lis r5,swapper_pg_dir@ha /* if kernel address, use */ + andi. r0,r9,MSR_PR /* Check usermode */ addi r5,r5,swapper_pg_dir@l /* kernel page table */ - rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ +#ifdef CONFIG_SMP + bne- .Lhash_page_out /* return if usermode */ +#else + bnelr- +#endif 112: tophys(r5, r5) #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ @@ -113,15 +118,15 @@ _GLOBAL(hash_page) lwarx r6,0,r8 /* get linux-style pte, flag word */ #ifdef CONFIG_PPC_KUAP mfsrin r5,r4 - rlwinm r0,r9,28,_PAGE_RW /* MSR[PR] => _PAGE_RW */ - rlwinm r5,r5,12,_PAGE_RW /* Ks => _PAGE_RW */ + rlwinm r0,r9,28,_PAGE_WRITE /* MSR[PR] => _PAGE_WRITE */ + rlwinm r5,r5,12,_PAGE_WRITE /* Ks => _PAGE_WRITE */ andc r5,r5,r0 /* Ks & ~MSR[PR] */ - andc r5,r6,r5 /* Clear _PAGE_RW when Ks = 1 && MSR[PR] = 0 */ + andc r5,r6,r5 /* Clear _PAGE_WRITE when Ks = 1 && MSR[PR] = 0 */ andc. r5,r3,r5 /* check access & ~permission */ #else andc. r5,r3,r6 /* check access & ~permission */ #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + rlwinm r0,r3,32-3,24,24 /* _PAGE_WRITE access -> _PAGE_DIRTY */ ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE #ifdef CONFIG_SMP bne- .Lhash_page_out /* return if access not permitted */ @@ -307,12 +312,15 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) __REF _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ + lis r0, TASK_SIZE@h + rlwinm r5,r5,0,~3 /* Clear PP bits */ + cmplw r4,r0 + rlwinm r8,r5,32-9,30,30 /* _PAGE_WRITE -> PP msb */ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ and r8,r8,r0 /* writable if _RW & _DIRTY */ - rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ - ori r8,r8,0xe04 /* clear out reserved bits */ + bge- 1f /* Kernelspace ? Skip */ + ori r5,r5,3 /* Userspace ? PP = 3 */ +1: ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */ BEGIN_FTR_SECTION rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 850783cfa9c7..5445587bfe84 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -127,7 +127,7 @@ static void setibat(int index, unsigned long virt, phys_addr_t phys, wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX); bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp; - if (flags & _PAGE_USER) + if (!is_kernel_addr(virt)) bat[0].batu |= 1; /* Vp = 1 */ } @@ -277,10 +277,10 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* Do DBAT first */ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED); - wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; + wimgxpp |= (flags & _PAGE_WRITE) ? BPP_RW : BPP_RX; bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; - if (flags & _PAGE_USER) + if (!is_kernel_addr(virt)) bat[1].batu |= 1; /* Vp = 1 */ if (flags & _PAGE_GUARDED) { /* G bit must be zero in IBATs */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 8f8a62d3ff4d..be229290a6a7 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -635,12 +635,10 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags) unsigned long prot; /* Radix supports execute-only, but protection_map maps X -> RX */ - if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) { - prot = pgprot_val(PAGE_EXECONLY); - } else { - prot = pgprot_val(protection_map[vm_flags & - (VM_ACCESS_FLAGS | VM_SHARED)]); - } + if (!radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) + vm_flags |= VM_READ; + + prot = pgprot_val(protection_map[vm_flags & (VM_ACCESS_FLAGS | VM_SHARED)]); if (vm_flags & VM_SAO) prot |= _PAGE_SAO; diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 2369d1bf2411..fde7790277f7 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -67,7 +67,7 @@ static int drmem_update_dt_v1(struct device_node *memory, struct property *new_prop; struct of_drconf_cell_v1 *dr_cell; struct drmem_lmb *lmb; - u32 *p; + __be32 *p; new_prop = clone_property(prop, prop->length); if (!new_prop) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b1723094d464..9e49ede2bc1c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -266,14 +266,15 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma } /* - * VM_READ, VM_WRITE and VM_EXEC all imply read permissions, as - * defined in protection_map[]. Read faults can only be caused by - * a PROT_NONE mapping, or with a PROT_EXEC-only mapping on Radix. + * VM_READ, VM_WRITE and VM_EXEC may imply read permissions, as + * defined in protection_map[]. In that case Read faults can only be + * caused by a PROT_NONE mapping. However a non exec access on a + * VM_EXEC only mapping is invalid anyway, so report it as such. */ if (unlikely(!vma_is_accessible(vma))) return true; - if (unlikely(radix_enabled() && ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC))) + if ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC) return true; /* diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index d8adc452f431..4e71dfe7d026 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -39,6 +39,7 @@ #include <asm/hugetlb.h> #include <asm/kup.h> #include <asm/kasan.h> +#include <asm/fixmap.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 705e8e8ffde4..7b0afcabd89f 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -50,10 +50,6 @@ void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags) if (pte_write(pte)) pte = pte_mkdirty(pte); - /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ - pte = pte_exprotect(pte); - pte = pte_mkprivileged(pte); - if (iowa_is_active()) return iowa_ioremap(addr, size, pte_pgprot(pte), caller); return __ioremap_caller(addr, size, pte_pgprot(pte), caller); @@ -66,7 +62,7 @@ int early_ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long i; for (i = 0; i < size; i += PAGE_SIZE) { - int err = map_kernel_page(ea + i, pa + i, prot); + int err = map_kernel_page(ea + i, pa + i, pgprot_nx(prot)); if (WARN_ON_ONCE(err)) /* Should clean up */ return err; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 07e8f4f1e07f..1717554b04b1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -26,6 +26,7 @@ #include <asm/ftrace.h> #include <asm/code-patching.h> #include <asm/setup.h> +#include <asm/fixmap.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index 3684d6e570fb..e835e80c09db 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -48,20 +48,25 @@ */ void __init MMU_init_hw(void) { + int i; + unsigned long zpr; + /* * The Zone Protection Register (ZPR) defines how protection will - * be applied to every page which is a member of a given zone. At - * present, we utilize only two of the 4xx's zones. + * be applied to every page which is a member of a given zone. * The zone index bits (of ZSEL) in the PTE are used for software - * indicators, except the LSB. For user access, zone 1 is used, - * for kernel access, zone 0 is used. We set all but zone 1 - * to zero, allowing only kernel access as indicated in the PTE. - * For zone 1, we set a 01 binary (a value of 10 will not work) + * indicators. We use the 4 upper bits of virtual address to select + * the zone. We set all zones above TASK_SIZE to zero, allowing + * only kernel access as indicated in the PTE. For zones below + * TASK_SIZE, we set a 01 binary (a value of 10 will not work) * to allow user access as indicated in the PTE. This also allows * kernel access as indicated in the PTE. */ - mtspr(SPRN_ZPR, 0x10000000); + for (i = 0, zpr = 0; i < TASK_SIZE >> 28; i++) + zpr |= 1 << (30 - i * 2); + + mtspr(SPRN_ZPR, zpr); flush_instruction_cache(); diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index a642a7929892..6be6421086ed 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -10,6 +10,8 @@ #include <linux/memblock.h> #include <linux/hugetlb.h> +#include <asm/fixmap.h> + #include <mm/mmu_decl.h> #define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index b80fc4a91a53..1c5e4ecbebeb 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -71,7 +71,7 @@ static void __init *early_alloc_pgtable(unsigned long size) * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +int __ref map_kernel_page(unsigned long ea, phys_addr_t pa, pgprot_t prot) { pgd_t *pgdp; p4d_t *p4dp; diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c index 40a4e69ae1a9..921c3521ec11 100644 --- a/arch/powerpc/mm/nohash/e500.c +++ b/arch/powerpc/mm/nohash/e500.c @@ -117,15 +117,15 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR; - TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_SW : 0; + TLBCAM[index].MAS3 |= (flags & _PAGE_WRITE) ? MAS3_SW : 0; if (mmu_has_feature(MMU_FTR_BIG_PHYS)) TLBCAM[index].MAS7 = (u64)phys >> 32; /* Below is unlikely -- only for large user pages or similar */ - if (pte_user(__pte(flags))) { + if (!is_kernel_addr(virt)) { TLBCAM[index].MAS3 |= MAS3_UR; TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0; - TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0; + TLBCAM[index].MAS3 |= (flags & _PAGE_WRITE) ? MAS3_UW : 0; } else { TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_SX : 0; } diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c index 6b30e40d4590..a134d28a0e4d 100644 --- a/arch/powerpc/mm/nohash/e500_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c @@ -178,8 +178,7 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) * * This must always be called with the pte lock held. */ -void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, unsigned int nr) +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { if (is_vm_hugetlb_page(vma)) book3e_hugetlb_preload(vma, address, *ptep); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 4d69bfb9bc11..a04ae4449a02 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -46,13 +46,13 @@ static inline int is_exec_fault(void) * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that * on userspace PTEs */ -static inline int pte_looks_normal(pte_t pte) +static inline int pte_looks_normal(pte_t pte, unsigned long addr) { if (pte_present(pte) && !pte_special(pte)) { if (pte_ci(pte)) return 0; - if (pte_user(pte)) + if (!is_kernel_addr(addr)) return 1; } return 0; @@ -79,11 +79,11 @@ static struct folio *maybe_pte_to_folio(pte_t pte) * support falls into the same category. */ -static pte_t set_pte_filter_hash(pte_t pte) +static pte_t set_pte_filter_hash(pte_t pte, unsigned long addr) { pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); - if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || - cpu_has_feature(CPU_FTR_NOEXECUTE))) { + if (pte_looks_normal(pte, addr) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || + cpu_has_feature(CPU_FTR_NOEXECUTE))) { struct folio *folio = maybe_pte_to_folio(pte); if (!folio) return pte; @@ -97,7 +97,7 @@ static pte_t set_pte_filter_hash(pte_t pte) #else /* CONFIG_PPC_BOOK3S */ -static pte_t set_pte_filter_hash(pte_t pte) { return pte; } +static pte_t set_pte_filter_hash(pte_t pte, unsigned long addr) { return pte; } #endif /* CONFIG_PPC_BOOK3S */ @@ -107,7 +107,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } * * This is also called once for the folio. So only work with folio->flags here. */ -static inline pte_t set_pte_filter(pte_t pte) +static inline pte_t set_pte_filter(pte_t pte, unsigned long addr) { struct folio *folio; @@ -115,10 +115,10 @@ static inline pte_t set_pte_filter(pte_t pte) return pte; if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) - return set_pte_filter_hash(pte); + return set_pte_filter_hash(pte, addr); /* No exec permission in the first place, move on */ - if (!pte_exec(pte) || !pte_looks_normal(pte)) + if (!pte_exec(pte) || !pte_looks_normal(pte, addr)) return pte; /* If you set _PAGE_EXEC on weird pages you're on your own */ @@ -198,7 +198,7 @@ void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, * is called. Filter the pte value and use the filtered value * to setup all the ptes in the range. */ - pte = set_pte_filter(pte); + pte = set_pte_filter(pte, addr); /* * We don't need to call arch_enter/leave_lazy_mmu_mode() @@ -314,7 +314,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, */ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); - pte = set_pte_filter(pte); + pte = set_pte_filter(pte, addr); val = pte_val(pte); @@ -505,7 +505,7 @@ const pgprot_t protection_map[16] = { [VM_READ] = PAGE_READONLY, [VM_WRITE] = PAGE_COPY, [VM_WRITE | VM_READ] = PAGE_COPY, - [VM_EXEC] = PAGE_READONLY_X, + [VM_EXEC] = PAGE_EXECONLY_X, [VM_EXEC | VM_READ] = PAGE_READONLY_X, [VM_EXEC | VM_WRITE] = PAGE_COPY_X, [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X, @@ -513,7 +513,7 @@ const pgprot_t protection_map[16] = { [VM_SHARED | VM_READ] = PAGE_READONLY, [VM_SHARED | VM_WRITE] = PAGE_SHARED, [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, - [VM_SHARED | VM_EXEC] = PAGE_READONLY_X, + [VM_SHARED | VM_EXEC] = PAGE_EXECONLY_X, [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X, [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X, [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index fac932eb8f9a..b5c79b11ea3c 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -21,11 +21,6 @@ static const struct flag_info flag_array[] = { .set = "huge", .clear = " ", }, { - .mask = _PAGE_SH, - .val = 0, - .set = "user", - .clear = " ", - }, { .mask = _PAGE_RO | _PAGE_NA, .val = 0, .set = "rw", diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f884760ca5cf..39c30c62b7ea 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -11,15 +11,15 @@ static const struct flag_info flag_array[] = { { - .mask = _PAGE_USER, - .val = _PAGE_USER, - .set = "user", - .clear = " ", + .mask = _PAGE_READ, + .val = 0, + .set = " ", + .clear = "r", }, { - .mask = _PAGE_RW, + .mask = _PAGE_WRITE, .val = 0, - .set = "r ", - .clear = "rw", + .set = " ", + .clear = "w", }, { .mask = _PAGE_EXEC, .val = _PAGE_EXEC, diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 72b7bb34fade..cdea5dccaefe 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -36,9 +36,6 @@ EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) -/* bl (unconditional 'branch' with link) */ -#define PPC_BL(dest) EMIT(PPC_RAW_BL((dest) - (unsigned long)(image + ctx->idx))) - /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) \ do { \ @@ -147,12 +144,6 @@ struct codegen_context { #define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ #endif -static inline void bpf_flush_icache(void *start, void *end) -{ - smp_wmb(); /* smp write barrier */ - flush_icache_range((unsigned long)start, (unsigned long)end); -} - static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) { return ctx->seen & (1 << (31 - i)); @@ -169,16 +160,17 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) } void bpf_jit_init_reg_mapping(struct codegen_context *ctx); -int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func); +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx, u32 *addrs, int pass, bool extra_pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, - int insn_idx, int jmp_off, int dst_reg); +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, + struct codegen_context *ctx, int insn_idx, + int jmp_off, int dst_reg); #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 37043dfc1add..0f9a21783329 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -13,9 +13,13 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/if_vlan.h> -#include <asm/kprobes.h> +#include <linux/kernel.h> +#include <linux/memory.h> #include <linux/bpf.h> +#include <asm/kprobes.h> +#include <asm/code-patching.h> + #include "bpf_jit.h" static void bpf_jit_fill_ill_insns(void *area, unsigned int size) @@ -39,10 +43,13 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, return 0; } -struct powerpc64_jit_data { - struct bpf_binary_header *header; +struct powerpc_jit_data { + /* address of rw header */ + struct bpf_binary_header *hdr; + /* address of ro final header */ + struct bpf_binary_header *fhdr; u32 *addrs; - u8 *image; + u8 *fimage; u32 proglen; struct codegen_context ctx; }; @@ -59,15 +66,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) u8 *image = NULL; u32 *code_base; u32 *addrs; - struct powerpc64_jit_data *jit_data; + struct powerpc_jit_data *jit_data; struct codegen_context cgctx; int pass; int flen; - struct bpf_binary_header *bpf_hdr; + struct bpf_binary_header *fhdr = NULL; + struct bpf_binary_header *hdr = NULL; struct bpf_prog *org_fp = fp; struct bpf_prog *tmp_fp; bool bpf_blinded = false; bool extra_pass = false; + u8 *fimage = NULL; + u32 *fcode_base; u32 extable_len; u32 fixup_len; @@ -97,9 +107,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) addrs = jit_data->addrs; if (addrs) { cgctx = jit_data->ctx; - image = jit_data->image; - bpf_hdr = jit_data->header; + /* + * JIT compiled to a writable location (image/code_base) first. + * It is then moved to the readonly final location (fimage/fcode_base) + * using instruction patching. + */ + fimage = jit_data->fimage; + fhdr = jit_data->fhdr; proglen = jit_data->proglen; + hdr = jit_data->hdr; + image = (void *)hdr + ((void *)fimage - (void *)fhdr); extra_pass = true; /* During extra pass, ensure index is reset before repopulating extable entries */ cgctx.exentry_idx = 0; @@ -119,7 +136,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) { + if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { /* We hit something illegal or unsupported. */ fp = org_fp; goto out_addrs; @@ -134,7 +151,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) { + if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { fp = org_fp; goto out_addrs; } @@ -146,9 +163,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * update ctgtx.idx as it pretends to output instructions, then we can * calculate total size from idx. */ - bpf_jit_build_prologue(0, &cgctx); + bpf_jit_build_prologue(NULL, &cgctx); addrs[fp->len] = cgctx.idx * 4; - bpf_jit_build_epilogue(0, &cgctx); + bpf_jit_build_epilogue(NULL, &cgctx); fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); @@ -156,17 +173,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) proglen = cgctx.idx * 4; alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; - bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); - if (!bpf_hdr) { + fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image, + bpf_jit_fill_ill_insns); + if (!fhdr) { fp = org_fp; goto out_addrs; } if (extable_len) - fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len; + fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len; skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE); /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { @@ -174,8 +193,10 @@ skip_init_ctx: cgctx.idx = 0; cgctx.alt_exit_addr = 0; bpf_jit_build_prologue(code_base, &cgctx); - if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) { - bpf_jit_binary_free(bpf_hdr); + if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, + extra_pass)) { + bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size)); + bpf_jit_binary_pack_free(fhdr, hdr); fp = org_fp; goto out_addrs; } @@ -195,17 +216,19 @@ skip_init_ctx: #ifdef CONFIG_PPC64_ELF_ABI_V1 /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[0] = (u64)fcode_base; ((u64 *)image)[1] = local_paca->kernel_toc; #endif - fp->bpf_func = (void *)image; + fp->bpf_func = (void *)fimage; fp->jited = 1; fp->jited_len = proglen + FUNCTION_DESCR_SIZE; - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size); if (!fp->is_func || extra_pass) { - bpf_jit_binary_lock_ro(bpf_hdr); + if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) { + fp = org_fp; + goto out_addrs; + } bpf_prog_fill_jited_linfo(fp, addrs); out_addrs: kfree(addrs); @@ -215,8 +238,9 @@ out_addrs: jit_data->addrs = addrs; jit_data->ctx = cgctx; jit_data->proglen = proglen; - jit_data->image = image; - jit_data->header = bpf_hdr; + jit_data->fimage = fimage; + jit_data->fhdr = fhdr; + jit_data->hdr = hdr; } out: @@ -230,12 +254,13 @@ out: * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling * this function, as this only applies to BPF_PROBE_MEM, for now. */ -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, - int insn_idx, int jmp_off, int dst_reg) +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, + struct codegen_context *ctx, int insn_idx, int jmp_off, + int dst_reg) { off_t offset; unsigned long pc; - struct exception_table_entry *ex; + struct exception_table_entry *ex, *ex_entry; u32 *fixup; /* Populate extable entries only in the last pass */ @@ -246,9 +271,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) return -EINVAL; + /* + * Program is first written to image before copying to the + * final location (fimage). Accordingly, update in the image first. + * As all offsets used are relative, copying as is to the + * final location should be alright. + */ pc = (unsigned long)&image[insn_idx]; + ex = (void *)fp->aux->extable - (void *)fimage + (void *)image; - fixup = (void *)fp->aux->extable - + fixup = (void *)ex - (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + (ctx->exentry_idx * BPF_FIXUP_LEN * 4); @@ -259,18 +291,71 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code fixup[BPF_FIXUP_LEN - 1] = PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); - ex = &fp->aux->extable[ctx->exentry_idx]; + ex_entry = &ex[ctx->exentry_idx]; - offset = pc - (long)&ex->insn; + offset = pc - (long)&ex_entry->insn; if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) return -ERANGE; - ex->insn = offset; + ex_entry->insn = offset; - offset = (long)fixup - (long)&ex->fixup; + offset = (long)fixup - (long)&ex_entry->fixup; if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) return -ERANGE; - ex->fixup = offset; + ex_entry->fixup = offset; ctx->exentry_idx++; return 0; } + +void *bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + int err; + + if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) + return ERR_PTR(-EINVAL); + + mutex_lock(&text_mutex); + err = patch_instructions(dst, src, len, false); + mutex_unlock(&text_mutex); + + return err ? ERR_PTR(err) : dst; +} + +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + u32 insn = BREAKPOINT_INSTRUCTION; + int ret; + + if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) + return -EINVAL; + + mutex_lock(&text_mutex); + ret = patch_instructions(dst, &insn, len, true); + mutex_unlock(&text_mutex); + + return ret; +} + +void bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) { + struct powerpc_jit_data *jit_data = fp->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), + * the program may not be finalized yet. Call finalize here + * before freeing it. + */ + if (jit_data) { + bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr); + kvfree(jit_data->addrs); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(fp); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); + } + + bpf_prog_unlock_free(fp); +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 7f91ea064c08..2f39c50ca729 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -200,12 +200,13 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +/* Relative offset needs to be calculated based on final image location */ +int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) { - s32 rel = (s32)func - (s32)(image + ctx->idx); + s32 rel = (s32)func - (s32)(fimage + ctx->idx); if (image && rel < 0x2000000 && rel >= -0x2000000) { - PPC_BL(func); + EMIT(PPC_RAW_BL(rel)); } else { /* Load function address into r0 */ EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); @@ -278,7 +279,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o } /* Assemble the body code between the prologue & epilogue */ -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx, u32 *addrs, int pass, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; @@ -940,7 +941,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * !fp->aux->verifier_zext. Emit NOP otherwise. * * Note that "li reg_h,0" is emitted for BPF_B/H/W case, - * if necessary. So, jump there insted of emitting an + * if necessary. So, jump there instead of emitting an * additional "li reg_h,0" instruction. */ if (size == BPF_DW && !fp->aux->verifier_zext) @@ -997,7 +998,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * jmp_off += 4; } - ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx, + ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx, jmp_off, dst_reg); if (ret) return ret; @@ -1053,7 +1054,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12)); } - ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); if (ret) return ret; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0f8048f6dad6..79f23974a320 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -240,7 +240,7 @@ static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u return 0; } -int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -361,7 +361,7 @@ asm ( ); /* Assemble the body code between the prologue & epilogue */ -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx, u32 *addrs, int pass, bool extra_pass) { enum stf_barrier_type stf_barrier = stf_barrier_type_get(); @@ -940,8 +940,8 @@ emit_clear: addrs[++i] = ctx->idx * 4; if (BPF_MODE(code) == BPF_PROBE_MEM) { - ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1, - 4, dst_reg); + ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, + ctx->idx - 1, 4, dst_reg); if (ret) return ret; } @@ -995,7 +995,7 @@ emit_clear: if (func_addr_fixed) ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); else - ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); if (ret) return ret; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 8c1f7def596e..10b946e9c6e7 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1371,8 +1371,7 @@ static void power_pmu_disable(struct pmu *pmu) /* * Disable instruction sampling if it was enabled */ - if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) - val &= ~MMCRA_SAMPLE_ENABLE; + val &= ~MMCRA_SAMPLE_ENABLE; /* Disable BHRB via mmcra (BHRBRD) for p10 */ if (ppmu->flags & PPMU_ARCH_31) @@ -1383,7 +1382,7 @@ static void power_pmu_disable(struct pmu *pmu) * instruction sampling or BHRB. */ if (val != mmcra) { - mtspr(SPRN_MMCRA, mmcra); + mtspr(SPRN_MMCRA, val); mb(); isync(); } diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 3449be7c0d51..057ec2e3451d 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1338,7 +1338,7 @@ static int get_count_from_result(struct perf_event *event, for (i = count = 0, element_data = res->elements + data_offset; i < num_elements; i++, element_data += data_size + data_offset) - count += be64_to_cpu(*((u64 *) element_data)); + count += be64_to_cpu(*((__be64 *)element_data)); *countp = count; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9d229ef7f86e..5d12ca386c1f 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -51,7 +51,7 @@ static int trace_imc_mem_size; * core and trace-imc */ static struct imc_pmu_ref imc_global_refc = { - .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), + .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock), .id = 0, .refc = 0, }; @@ -544,7 +544,7 @@ static int nest_imc_event_init(struct perf_event *event) break; } pcni++; - } while (pcni->vbase != 0); + } while (pcni->vbase); if (!flag) return -ENODEV; @@ -1025,16 +1025,16 @@ static bool is_thread_imc_pmu(struct perf_event *event) return false; } -static u64 * get_event_base_addr(struct perf_event *event) +static __be64 *get_event_base_addr(struct perf_event *event) { u64 addr; if (is_thread_imc_pmu(event)) { addr = (u64)per_cpu(thread_imc_mem, smp_processor_id()); - return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK)); + return (__be64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK)); } - return (u64 *)event->hw.event_base; + return (__be64 *)event->hw.event_base; } static void thread_imc_pmu_start_txn(struct pmu *pmu, @@ -1058,7 +1058,8 @@ static int thread_imc_pmu_commit_txn(struct pmu *pmu) static u64 imc_read_counter(struct perf_event *event) { - u64 *addr, data; + __be64 *addr; + u64 data; /* * In-Memory Collection (IMC) counters are free flowing counters. diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 5729b6e059de..9f720b522e17 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -335,26 +335,38 @@ static const unsigned int event_alternatives[][MAX_ALT] = { { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ }; -/* - * This could be made more efficient with a binary search on - * a presorted list, if necessary - */ static int find_alternatives_list(u64 event) { - int i, j; - unsigned int alt; - - for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { - if (event < event_alternatives[i][0]) - return -1; - for (j = 0; j < MAX_ALT; ++j) { - alt = event_alternatives[i][j]; - if (!alt || event < alt) - break; - if (event == alt) - return i; - } + const unsigned int presorted_event_table[] = { + 0x0130e8, 0x080080, 0x080088, 0x10000a, 0x10000b, 0x10000d, 0x10000e, + 0x100010, 0x10001a, 0x100026, 0x100054, 0x100056, 0x1000f0, 0x1000f8, + 0x1000fc, 0x200008, 0x20000e, 0x200010, 0x200012, 0x200054, 0x2000f0, + 0x2000f2, 0x2000f4, 0x2000f5, 0x2000f6, 0x2000f8, 0x2000fc, 0x2000fe, + 0x2d0030, 0x30000a, 0x30000c, 0x300010, 0x300012, 0x30001a, 0x300056, + 0x3000f0, 0x3000f2, 0x3000f6, 0x3000f8, 0x3000fc, 0x3000fe, 0x400006, + 0x400007, 0x40000a, 0x40000e, 0x400010, 0x400018, 0x400056, 0x4000f0, + 0x4000f8, 0x600005 + }; + const unsigned int event_index_table[] = { + 0, 1, 2, 3, 4, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 12, 14, + 7, 15, 2, 9, 16, 3, 4, 0, 17, 10, 18, 19, 20, 1, 17, 15, 19, + 18, 2, 16, 21, 8, 0, 22, 13, 14, 11, 21, 5, 20, 22, 1, 6, 3 + }; + int hi = ARRAY_SIZE(presorted_event_table) - 1; + int lo = 0; + + while (lo <= hi) { + int mid = lo + (hi - lo) / 2; + unsigned int alt = presorted_event_table[mid]; + + if (alt < event) + lo = mid + 1; + else if (alt > event) + hi = mid - 1; + else + return event_index_table[mid]; } + return -1; } diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c index b2d940437a66..5412e6b21e10 100644 --- a/arch/powerpc/platforms/4xx/soc.c +++ b/arch/powerpc/platforms/4xx/soc.c @@ -112,7 +112,7 @@ static int __init ppc4xx_l2c_probe(void) } /* Install error handler */ - if (request_irq(irq, l2c_error_handler, 0, "L2C", 0) < 0) { + if (request_irq(irq, l2c_error_handler, 0, "L2C", NULL) < 0) { printk(KERN_ERR "Cannot install L2C error handler" ", cache is not enabled\n"); of_node_put(np); diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 2fb2a85d131f..1135c1ab923c 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -14,6 +14,8 @@ #include <asm/io.h> #include <asm/hw_irq.h> #include <asm/ipic.h> +#include <asm/fixmap.h> + #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index ebb5f6a27dbf..b24d4102fbf6 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -40,6 +40,7 @@ #include <asm/io.h> #include <asm/rheap.h> #include <asm/cpm.h> +#include <asm/fixmap.h> #include <sysdev/fsl_soc.h> diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 77ea9335fd04..f381b177ea06 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -4,6 +4,8 @@ * Copyright (C) 2019 Haren Myneni, IBM Corp */ +#define pr_fmt(fmt) "vas-api: " fmt + #include <linux/kernel.h> #include <linux/device.h> #include <linux/cdev.h> @@ -78,7 +80,7 @@ int get_vas_user_win_ref(struct vas_user_win_ref *task_ref) task_ref->mm = get_task_mm(current); if (!task_ref->mm) { put_pid(task_ref->pid); - pr_err("VAS: pid(%d): mm_struct is not found\n", + pr_err("pid(%d): mm_struct is not found\n", current->pid); return -EPERM; } @@ -235,8 +237,7 @@ void vas_update_csb(struct coprocessor_request_block *crb, rc = kill_pid_info(SIGSEGV, &info, pid); rcu_read_unlock(); - pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, - pid_vnr(pid), rc); + pr_devel("pid %d kill_proc_info() rc %d\n", pid_vnr(pid), rc); } void vas_dump_crb(struct coprocessor_request_block *crb) @@ -294,7 +295,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) rc = copy_from_user(&uattr, uptr, sizeof(uattr)); if (rc) { - pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + pr_err("copy_from_user() returns %d\n", rc); return -EFAULT; } @@ -311,7 +312,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags, cp_inst->coproc->cop_type); if (IS_ERR(txwin)) { - pr_err("%s() VAS window open failed, %ld\n", __func__, + pr_err_ratelimited("VAS window open failed rc=%ld\n", PTR_ERR(txwin)); return PTR_ERR(txwin); } @@ -405,8 +406,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) * window is not opened. Shouldn't expect this error. */ if (!cp_inst || !cp_inst->txwin) { - pr_err("%s(): Unexpected fault on paste address with TX window closed\n", - __func__); + pr_err("Unexpected fault on paste address with TX window closed\n"); return VM_FAULT_SIGBUS; } @@ -421,8 +421,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) * issue NX request. */ if (txwin->task_ref.vma != vmf->vma) { - pr_err("%s(): No previous mapping with paste address\n", - __func__); + pr_err("No previous mapping with paste address\n"); return VM_FAULT_SIGBUS; } @@ -481,19 +480,19 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) txwin = cp_inst->txwin; if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { - pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + pr_debug("size 0x%zx, PAGE_SIZE 0x%zx\n", (vma->vm_end - vma->vm_start), PAGE_SIZE); return -EINVAL; } /* Ensure instance has an open send window */ if (!txwin) { - pr_err("%s(): No send window open?\n", __func__); + pr_err("No send window open?\n"); return -EINVAL; } if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) { - pr_err("%s(): VAS API is not registered\n", __func__); + pr_err("VAS API is not registered\n"); return -EACCES; } @@ -510,14 +509,14 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) */ mutex_lock(&txwin->task_ref.mmap_mutex); if (txwin->status != VAS_WIN_ACTIVE) { - pr_err("%s(): Window is not active\n", __func__); + pr_err("Window is not active\n"); rc = -EACCES; goto out; } paste_addr = cp_inst->coproc->vops->paste_addr(txwin); if (!paste_addr) { - pr_err("%s(): Window paste address failed\n", __func__); + pr_err("Window paste address failed\n"); rc = -EINVAL; goto out; } @@ -533,8 +532,8 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, vma->vm_end - vma->vm_start, prot); - pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, - paste_addr, vma->vm_start, rc); + pr_devel("paste addr %llx at %lx, rc %d\n", paste_addr, + vma->vm_start, rc); txwin->task_ref.vma = vma; vma->vm_ops = &vas_vm_ops; @@ -609,8 +608,7 @@ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, goto err; } - pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), - MINOR(devno)); + pr_devel("Added dev [%d,%d]\n", MAJOR(devno), MINOR(devno)); return 0; diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index 130707ec9f99..8bdae0caf21e 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -2,6 +2,7 @@ config PPC_PMAC bool "Apple PowerMac based machines" depends on PPC_BOOK3S && CPU_BIG_ENDIAN + select ADB_CUDA if POWER_RESET && PPC32 select MPIC select FORCE_PCI select PPC_INDIRECT_PCI if PPC32 diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index ae62d432db8b..81c9fbae88b1 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2614,7 +2614,8 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ struct device_node* node; int i; volatile u32 __iomem *base; - const u32 *addrp, *revp; + const __be32 *addrp; + const u32 *revp; phys_addr_t addr; u64 size; diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 40f3aa432fba..c097d591670e 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -925,8 +925,10 @@ static void __init smu_i2c_probe(void) sz = sizeof(struct pmac_i2c_bus) + sizeof(struct smu_i2c_cmd); bus = kzalloc(sz, GFP_KERNEL); - if (bus == NULL) + if (bus == NULL) { + of_node_put(busnode); return; + } bus->controller = controller; bus->busnode = of_node_get(busnode); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 8be71920e63c..c83d1e14077e 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -598,8 +598,10 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus) name = "Pulsar"; break; } - if (pmac_tb_freeze != NULL) + if (pmac_tb_freeze != NULL) { + of_node_put(cc); break; + } } if (pmac_tb_freeze != NULL) { /* Open i2c bus for synchronous access */ diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index 3f715efb0aa6..5eeb794b5eb1 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - (u64)(reg_entry->reg_val)); + (u64 __force)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index aa4042dcd6d4..a43bfb01720a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -55,7 +55,8 @@ static bool find_aa_index(struct device_node *dr_node, struct property *ala_prop, const u32 *lmb_assoc, u32 *aa_index) { - u32 *assoc_arrays, new_prop_size; + __be32 *assoc_arrays; + u32 new_prop_size; struct property *new_prop; int aa_arrays, aa_array_entries, aa_array_sz; int i, index; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 16d93b580f61..496e16c588aa 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -914,7 +914,8 @@ static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_ return 0; } -static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift) +static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift, + bool *direct_mapping) { struct dma_win *window; const struct dynamic_dma_window_prop *dma64; @@ -927,6 +928,7 @@ static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *windo dma64 = window->prop; *dma_addr = be64_to_cpu(dma64->dma_base); *window_shift = be32_to_cpu(dma64->window_shift); + *direct_mapping = window->direct; found = true; break; } @@ -1270,10 +1272,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) mutex_lock(&dma_win_init_mutex); - if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { - direct_mapping = (len >= max_ram_len); + if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping)) goto out_unlock; - } /* * If we already went through this for a previous function of diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2cb62148f36..4561667832ed 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -192,9 +192,9 @@ static void free_dtl_buffers(unsigned long *time_limit) continue; kmem_cache_free(dtl_cache, pp->dispatch_log); pp->dtl_ridx = 0; - pp->dispatch_log = 0; - pp->dispatch_log_end = 0; - pp->dtl_curr = 0; + pp->dispatch_log = NULL; + pp->dispatch_log_end = NULL; + pp->dtl_curr = NULL; if (time_limit && time_after(jiffies, *time_limit)) { cond_resched(); @@ -223,7 +223,7 @@ static void destroy_cpu_associativity(void) { kfree(vcpu_associativity); kfree(pcpu_associativity); - vcpu_associativity = pcpu_associativity = 0; + vcpu_associativity = pcpu_associativity = NULL; } static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag) @@ -526,8 +526,10 @@ static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p, if (cmd) { rc = init_cpu_associativity(); - if (rc) + if (rc) { + destroy_cpu_associativity(); goto out; + } for_each_possible_cpu(cpu) { disp = per_cpu_ptr(&vcpu_disp_data, cpu); diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 2d40304eb6c1..febe18f251d0 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -150,7 +150,7 @@ static int plpks_gen_password(void) ospasswordlength = maxpwsize; ospassword = kzalloc(maxpwsize, GFP_KERNEL); if (!ospassword) { - kfree(password); + kfree_sensitive(password); return -ENOMEM; } memcpy(ospassword, password, ospasswordlength); @@ -163,7 +163,7 @@ static int plpks_gen_password(void) } } out: - kfree(password); + kfree_sensitive(password); return pseries_status_to_err(rc); } diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index e25ac52acf50..b1f25bac280b 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -341,7 +341,7 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > atomic_read(&cop_feat_caps->nr_total_credits)) { - pr_err("Credits are not available to allocate window\n"); + pr_err_ratelimited("Credits are not available to allocate window\n"); rc = -EINVAL; goto out; } @@ -424,7 +424,7 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, put_vas_user_win_ref(&txwin->vas_win.task_ref); rc = -EBUSY; - pr_err("No credit is available to allocate window\n"); + pr_err_ratelimited("No credit is available to allocate window\n"); out_free: /* diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 57978a44d55b..558ec68d768e 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -11,9 +11,11 @@ #include <linux/msi.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> -#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/property.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> #include <linux/seq_file.h> @@ -392,7 +394,6 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, static const struct of_device_id fsl_of_msi_ids[]; static int fsl_of_msi_probe(struct platform_device *dev) { - const struct of_device_id *match; struct fsl_msi *msi; struct resource res, msiir; int err, i, j, irq_index, count; @@ -402,10 +403,7 @@ static int fsl_of_msi_probe(struct platform_device *dev) u32 offset; struct pci_controller *phb; - match = of_match_device(fsl_of_msi_ids, &dev->dev); - if (!match) - return -EINVAL; - features = match->data; + features = device_get_match_data(&dev->dev); printk(KERN_DEBUG "Setting up Freescale MSI support\n"); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ba287abcb008..dabbdd356664 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -355,7 +355,7 @@ static void __init mpic_test_broken_ipi(struct mpic *mpic) mpic_write(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0), MPIC_VECPRI_MASK); r = mpic_read(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0)); - if (r == le32_to_cpu(MPIC_VECPRI_MASK)) { + if (r == swab32(MPIC_VECPRI_MASK)) { printk(KERN_INFO "mpic: Detected reversed IPI registers\n"); mpic->flags |= MPIC_BROKEN_IPI; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 9f0af4d795d8..f1c0fa6ece21 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -802,7 +802,7 @@ int xive_native_get_queue_info(u32 vp_id, u32 prio, if (out_qpage) *out_qpage = be64_to_cpu(qpage); if (out_qsize) - *out_qsize = be32_to_cpu(qsize); + *out_qsize = be64_to_cpu(qsize); if (out_qeoi_page) *out_qeoi_page = be64_to_cpu(qeoi_page); if (out_escalate_irq) diff --git a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh index a31a56016c09..73e331e7660e 100755 --- a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh +++ b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh @@ -7,21 +7,20 @@ set -o pipefail # To debug, uncomment the following line # set -x -# -mprofile-kernel is only supported on 64-bit, so this should not be invoked -# for 32-bit. We pass in -m64 explicitly, and -mbig-endian and -mlittle-endian -# are passed in from Kconfig, which takes care of toolchains defaulting to -# other targets. +# -mprofile-kernel is only supported on 64-bit with ELFv2, so this should not +# be invoked for other targets. Therefore we can pass in -m64 and -mabi +# explicitly, to take care of toolchains defaulting to other targets. # Test whether the compile option -mprofile-kernel exists and generates # profiling code (ie. a call to _mcount()). echo "int func() { return 0; }" | \ - $* -m64 -S -x c -O2 -p -mprofile-kernel - -o - \ + $* -m64 -mabi=elfv2 -S -x c -O2 -p -mprofile-kernel - -o - \ 2> /dev/null | grep -q "_mcount" # Test whether the notrace attribute correctly suppresses calls to _mcount(). echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \ - $* -m64 -S -x c -O2 -p -mprofile-kernel - -o - \ + $* -m64 -mabi=elfv2 -S -x c -O2 -p -mprofile-kernel - -o - \ 2> /dev/null | grep -q "_mcount" && \ exit 1 diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index eaa15a20e6ae..95a2a06acc6a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -39,6 +39,7 @@ config RISCV select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_VDSO_DATA + select ARCH_KEEP_MEMBLOCK if ACPI select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK @@ -48,6 +49,7 @@ config RISCV select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU + select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG @@ -174,6 +176,11 @@ config GCC_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_GCC depends on $(cc-option,-fpatchable-function-entry=8) +config HAVE_SHADOW_CALL_STACK + def_bool $(cc-option,-fsanitize=shadow-call-stack) + # https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769 + depends on $(ld-option,--no-relax-gp) + config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT default 8 @@ -635,6 +642,15 @@ config THREAD_SIZE_ORDER Specify the Pages of thread stack size (from 4KB to 64KB), which also affects irq stack size, which is equal to thread stack size. +config RISCV_MISALIGNED + bool "Support misaligned load/store traps for kernel and userspace" + select SYSCTL_ARCH_UNALIGN_ALLOW + default y + help + Say Y here if you want the kernel to embed support for misaligned + load/store for both kernel and userspace. When disable, misaligned + accesses will generate SIGBUS in userspace and panic in kernel. + endmenu # "Platform type" menu "Kernel features" @@ -902,6 +918,9 @@ config PORTABLE select MMU select OF +config ARCH_PROC_KCORE_TEXT + def_bool y + menu "Power management options" source "kernel/power/Kconfig" diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug index e69de29bb2d1..eafe17ebf710 100644 --- a/arch/riscv/Kconfig.debug +++ b/arch/riscv/Kconfig.debug @@ -0,0 +1 @@ +source "arch/riscv/kernel/tests/Kconfig.debug" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index b43a6bb7e4dc..a74be78678eb 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -54,6 +54,10 @@ endif endif endif +ifeq ($(CONFIG_SHADOW_CALL_STACK),y) + KBUILD_LDFLAGS += --no-relax-gp +endif + # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima @@ -130,12 +134,6 @@ endif libs-y += arch/riscv/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -PHONY += vdso_install -vdso_install: - $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ - $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ - $(build)=arch/riscv/kernel/compat_vdso compat_$@) - ifeq ($(KBUILD_EXTMOD),) ifeq ($(CONFIG_MMU),y) prepare: vdso_prepare @@ -147,6 +145,9 @@ vdso_prepare: prepare0 endif endif +vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg +vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so + ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) KBUILD_IMAGE := $(boot)/loader.bin diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink index a46fc578b30b..829b9abc91f6 100644 --- a/arch/riscv/Makefile.postlink +++ b/arch/riscv/Makefile.postlink @@ -36,9 +36,6 @@ ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_strip) endif -%.ko: FORCE - @true - clean: @true diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 22b13947bd13..8e7fc0edf21d 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -17,6 +17,7 @@ KCOV_INSTRUMENT := n OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S +OBJCOPYFLAGS_loader.bin :=-O binary OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.* loader loader.o loader.lds loader.bin diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 1edf3cd886c5..905881282a7c 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -37,6 +37,13 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y CONFIG_PM=y CONFIG_CPU_IDLE=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=m +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m +CONFIG_CPUFREQ_DT=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_ACPI=y @@ -95,6 +102,7 @@ CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y +CONFIG_CAN=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -102,6 +110,11 @@ CONFIG_PCIE_XILINX=y CONFIG_PCIE_FU740=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MTD=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_NVME=m @@ -124,8 +137,11 @@ CONFIG_VIRTIO_NET=y CONFIG_MACB=y CONFIG_E1000E=y CONFIG_R8169=y +CONFIG_RAVB=y CONFIG_STMMAC_ETH=m +CONFIG_MICREL_PHY=y CONFIG_MICROSEMI_PHY=y +CONFIG_CAN_RCAR_CANFD=m CONFIG_INPUT_MOUSEDEV=y CONFIG_KEYBOARD_SUN4I_LRADC=m CONFIG_SERIAL_8250=y @@ -136,16 +152,24 @@ CONFIG_SERIAL_SH_SCI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_I2C_CHARDEV=m CONFIG_I2C_MV64XXX=m +CONFIG_I2C_RIIC=y CONFIG_SPI=y +CONFIG_SPI_RSPI=m CONFIG_SPI_SIFIVE=y CONFIG_SPI_SUN6I=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_GPIO_SIFIVE=y +CONFIG_CPU_THERMAL=y +CONFIG_DEVFREQ_THERMAL=y +CONFIG_RZG2L_THERMAL=y CONFIG_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y +CONFIG_RENESAS_RZG2LWDT=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_GPIO=y CONFIG_DRM=m CONFIG_DRM_RADEON=m CONFIG_DRM_NOUVEAU=m @@ -153,39 +177,69 @@ CONFIG_DRM_SUN4I=m CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SOC=y +CONFIG_SND_SOC_RZ=m +CONFIG_SND_SOC_WM8978=m +CONFIG_SND_SIMPLE_CARD=m CONFIG_USB=y +CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_XHCI_PLATFORM=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_RENESAS_USBHS=m CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_USB_MUSB_HDRC=m CONFIG_USB_MUSB_SUNXI=m CONFIG_NOP_USB_XCEIV=m +CONFIG_USB_GADGET=y +CONFIG_USB_RENESAS_USBHS_UDC=m +CONFIG_USB_CONFIGFS=m +CONFIG_USB_CONFIGFS_SERIAL=y +CONFIG_USB_CONFIGFS_ACM=y +CONFIG_USB_CONFIGFS_OBEX=y +CONFIG_USB_CONFIGFS_NCM=y +CONFIG_USB_CONFIGFS_ECM=y +CONFIG_USB_CONFIGFS_ECM_SUBSET=y +CONFIG_USB_CONFIGFS_RNDIS=y +CONFIG_USB_CONFIGFS_EEM=y +CONFIG_USB_CONFIGFS_MASS_STORAGE=y +CONFIG_USB_CONFIGFS_F_FS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SPI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_STARFIVE=y +CONFIG_MMC_SDHI=y CONFIG_MMC_SUNXI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SUN6I=y CONFIG_DMADEVICES=y CONFIG_DMA_SUN6I=m +CONFIG_RZ_DMAC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y +CONFIG_RENESAS_OSTM=y CONFIG_SUN8I_DE2_CCU=m CONFIG_SUN50I_IOMMU=y CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_ARCH_R9A07G043=y +CONFIG_IIO=y +CONFIG_RZG2L_ADC=m +CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_PHY_SUN4I_USB=m +CONFIG_PHY_RCAR_GEN3_USB2=y CONFIG_LIBNVDIMM=y CONFIG_NVMEM_SUNXI_SID=y CONFIG_EXT4_FS=y diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index d5604d2073bc..7dad0cf9d701 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -66,6 +66,8 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const char **isa); static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } +void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size, + u32 *cboz_size, u32 *cbop_size); #else static inline void acpi_init_rintc_map(void) { } static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) @@ -79,6 +81,10 @@ static inline int acpi_get_riscv_isa(struct acpi_table_header *table, return -EINVAL; } +static inline void acpi_get_cbo_block_size(struct acpi_table_header *table, + u32 *cbom_size, u32 *cboz_size, + u32 *cbop_size) { } + #endif /* CONFIG_ACPI */ #endif /*_ASM_ACPI_H*/ diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 61ba8ed43d8f..36b955c762ba 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -25,7 +25,6 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s); DECLARE_DO_ERROR_INFO(do_trap_ecall_m); DECLARE_DO_ERROR_INFO(do_trap_break); -asmlinkage unsigned long get_overflow_stack(void); asmlinkage void handle_bad_stack(struct pt_regs *regs); asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void do_irq(struct pt_regs *regs); diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 114bbadaef41..b0487b39e674 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -82,6 +82,47 @@ .endr .endm +#ifdef CONFIG_SMP +#ifdef CONFIG_32BIT +#define PER_CPU_OFFSET_SHIFT 2 +#else +#define PER_CPU_OFFSET_SHIFT 3 +#endif + +.macro asm_per_cpu dst sym tmp + REG_L \tmp, TASK_TI_CPU_NUM(tp) + slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT + la \dst, __per_cpu_offset + add \dst, \dst, \tmp + REG_L \tmp, 0(\dst) + la \dst, \sym + add \dst, \dst, \tmp +.endm +#else /* CONFIG_SMP */ +.macro asm_per_cpu dst sym tmp + la \dst, \sym +.endm +#endif /* CONFIG_SMP */ + +.macro load_per_cpu dst ptr tmp + asm_per_cpu \dst \ptr \tmp + REG_L \dst, 0(\dst) +.endm + +#ifdef CONFIG_SHADOW_CALL_STACK +/* gp is used as the shadow call stack pointer instead */ +.macro load_global_pointer +.endm +#else +/* load __global_pointer to gp */ +.macro load_global_pointer +.option push +.option norelax + la gp, __global_pointer$ +.option pop +.endm +#endif /* CONFIG_SHADOW_CALL_STACK */ + /* save all GPs except x1 ~ x5 */ .macro save_from_x6_to_x31 REG_S x6, PT_T1(sp) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 65f6eee4ab8d..224b4dc02b50 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -15,13 +15,261 @@ #include <asm/barrier.h> #include <asm/bitsperlong.h> +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) #include <asm-generic/bitops/__ffs.h> -#include <asm-generic/bitops/ffz.h> -#include <asm-generic/bitops/fls.h> #include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/ffs.h> +#include <asm-generic/bitops/fls.h> + +#else +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> + +#if (BITS_PER_LONG == 64) +#define CTZW "ctzw " +#define CLZW "clzw " +#elif (BITS_PER_LONG == 32) +#define CTZW "ctz " +#define CLZW "clz " +#else +#error "Unexpected BITS_PER_LONG" +#endif + +static __always_inline unsigned long variable__ffs(unsigned long word) +{ + int num; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + "ctz %0, %1\n" + ".option pop\n" + : "=r" (word) : "r" (word) :); + + return word; + +legacy: + num = 0; +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +/** + * __ffs - find first set bit in a long word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +#define __ffs(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(word) : \ + variable__ffs(word)) + +static __always_inline unsigned long variable__fls(unsigned long word) +{ + int num; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + "clz %0, %1\n" + ".option pop\n" + : "=r" (word) : "r" (word) :); + + return BITS_PER_LONG - 1 - word; + +legacy: + num = BITS_PER_LONG - 1; +#if BITS_PER_LONG == 64 + if (!(word & (~0ul << 32))) { + num -= 32; + word <<= 32; + } +#endif + if (!(word & (~0ul << (BITS_PER_LONG - 16)))) { + num -= 16; + word <<= 16; + } + if (!(word & (~0ul << (BITS_PER_LONG - 8)))) { + num -= 8; + word <<= 8; + } + if (!(word & (~0ul << (BITS_PER_LONG - 4)))) { + num -= 4; + word <<= 4; + } + if (!(word & (~0ul << (BITS_PER_LONG - 2)))) { + num -= 2; + word <<= 2; + } + if (!(word & (~0ul << (BITS_PER_LONG - 1)))) + num -= 1; + return num; +} + +/** + * __fls - find last set bit in a long word + * @word: the word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +#define __fls(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) : \ + variable__fls(word)) + +static __always_inline int variable_ffs(int x) +{ + int r; + + if (!x) + return 0; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + CTZW "%0, %1\n" + ".option pop\n" + : "=r" (r) : "r" (x) :); + + return r + 1; + +legacy: + r = 1; + if (!(x & 0xffff)) { + x >>= 16; + r += 16; + } + if (!(x & 0xff)) { + x >>= 8; + r += 8; + } + if (!(x & 0xf)) { + x >>= 4; + r += 4; + } + if (!(x & 3)) { + x >>= 2; + r += 2; + } + if (!(x & 1)) { + x >>= 1; + r += 1; + } + return r; +} + +/** + * ffs - find first set bit in a word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs routines. + * + * ffs(value) returns 0 if value is 0 or the position of the first set bit if + * value is nonzero. The first (least significant) bit is at position 1. + */ +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x)) + +static __always_inline int variable_fls(unsigned int x) +{ + int r; + + if (!x) + return 0; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + CLZW "%0, %1\n" + ".option pop\n" + : "=r" (r) : "r" (x) :); + + return 32 - r; + +legacy: + r = 32; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +/** + * fls - find last set bit in a word + * @x: the word to search + * + * This is defined in a similar way as ffs, but returns the position of the most + * significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last set bit if + * value is nonzero. The last (most significant) bit is at position 32. + */ +#define fls(x) \ +({ \ + typeof(x) x_ = (x); \ + __builtin_constant_p(x_) ? \ + (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ + : \ + variable_fls(x_); \ +}) + +#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */ + +#include <asm-generic/bitops/ffz.h> #include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/sched.h> -#include <asm-generic/bitops/ffs.h> #include <asm-generic/bitops/hweight.h> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index d0345bd659c9..a418c3112cd6 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -7,7 +7,10 @@ #define _ASM_CPUFEATURE_H #include <linux/bitmap.h> +#include <linux/jump_label.h> #include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <asm/errno.h> /* * These are probed via a device_initcall(), via either the SBI or directly @@ -30,6 +33,104 @@ DECLARE_PER_CPU(long, misaligned_access_speed); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; -void check_unaligned_access(int cpu); +void riscv_user_isa_enable(void); + +#ifdef CONFIG_RISCV_MISALIGNED +bool unaligned_ctl_available(void); +bool check_unaligned_access_emulated(int cpu); +void unaligned_emulation_finish(void); +#else +static inline bool unaligned_ctl_available(void) +{ + return false; +} + +static inline bool check_unaligned_access_emulated(int cpu) +{ + return false; +} + +static inline void unaligned_emulation_finish(void) {} +#endif + +unsigned long riscv_get_elf_hwcap(void); + +struct riscv_isa_ext_data { + const unsigned int id; + const char *name; + const char *property; +}; + +extern const struct riscv_isa_ext_data riscv_isa_ext[]; +extern const size_t riscv_isa_ext_count; +extern bool riscv_isa_fallback; + +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + +static __always_inline bool +riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, + "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_no); + } else { + if (!__riscv_isa_extension_available(NULL, ext)) + goto l_no; + } + + return true; +l_no: + return false; +} + +static __always_inline bool +riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, + "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_yes); + } else { + if (__riscv_isa_extension_available(NULL, ext)) + goto l_yes; + } + + return false; +l_yes: + return true; +} + +static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); +} + +static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); +} #endif diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index b3b2dfbdf945..06c236bfab53 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -14,7 +14,7 @@ #include <asm/auxvec.h> #include <asm/byteorder.h> #include <asm/cacheinfo.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> /* * These are used to set parameters in the core dumps. diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 6e4dee49d84b..7ab5e34318c8 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -8,4 +8,18 @@ void handle_page_fault(struct pt_regs *regs); void handle_break(struct pt_regs *regs); +#ifdef CONFIG_RISCV_MISALIGNED +int handle_misaligned_load(struct pt_regs *regs); +int handle_misaligned_store(struct pt_regs *regs); +#else +static inline int handle_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +static inline int handle_misaligned_store(struct pt_regs *regs) +{ + return -1; +} +#endif + #endif /* _ASM_RISCV_ENTRY_COMMON_H */ diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index b55b434f0059..83ed25e43553 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -95,31 +95,31 @@ asm volatile(ALTERNATIVE( \ #endif /* - * dcache.ipa rs1 (invalidate, physical address) + * th.dcache.ipa rs1 (invalidate, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01010 rs1 000 00000 0001011 - * dache.iva rs1 (invalida, virtual address) + * th.dache.iva rs1 (invalida, virtual address) * 0000001 00110 rs1 000 00000 0001011 * - * dcache.cpa rs1 (clean, physical address) + * th.dcache.cpa rs1 (clean, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01001 rs1 000 00000 0001011 - * dcache.cva rs1 (clean, virtual address) + * th.dcache.cva rs1 (clean, virtual address) * 0000001 00101 rs1 000 00000 0001011 * - * dcache.cipa rs1 (clean then invalidate, physical address) + * th.dcache.cipa rs1 (clean then invalidate, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01011 rs1 000 00000 0001011 - * dcache.civa rs1 (... virtual address) + * th.dcache.civa rs1 (... virtual address) * 0000001 00111 rs1 000 00000 0001011 * - * sync.s (make sure all cache operations finished) + * th.sync.s (make sure all cache operations finished) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000000 11001 00000 000 00000 0001011 */ -#define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0255000b" -#define THEAD_flush_A0 ".long 0x0275000b" +#define THEAD_INVAL_A0 ".long 0x0265000b" +#define THEAD_CLEAN_A0 ".long 0x0255000b" +#define THEAD_FLUSH_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 6fc51c1b34cf..06d30526ef3b 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -8,9 +8,6 @@ #ifndef _ASM_RISCV_HWCAP_H #define _ASM_RISCV_HWCAP_H -#include <asm/alternative-macros.h> -#include <asm/errno.h> -#include <linux/bits.h> #include <uapi/asm/hwcap.h> #define RISCV_ISA_EXT_a ('a' - 'a') @@ -69,76 +66,4 @@ #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA #endif -#ifndef __ASSEMBLY__ - -#include <linux/jump_label.h> - -unsigned long riscv_get_elf_hwcap(void); - -struct riscv_isa_ext_data { - const unsigned int id; - const char *name; - const char *property; -}; - -extern const struct riscv_isa_ext_data riscv_isa_ext[]; -extern const size_t riscv_isa_ext_count; -extern bool riscv_isa_fallback; - -unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - -static __always_inline bool -riscv_has_extension_likely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); - } else { - if (!__riscv_isa_extension_available(NULL, ext)) - goto l_no; - } - - return true; -l_no: - return false; -} - -static __always_inline bool -riscv_has_extension_unlikely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); - } else { - if (__riscv_isa_extension_available(NULL, ext)) - goto l_yes; - } - - return false; -l_yes: - return true; -} - -#endif - #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 78936f4ff513..5c48f48e79a6 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,6 +8,11 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 5 +#define RISCV_HWPROBE_MAX_KEY 6 + +static inline bool riscv_hwprobe_key_is_valid(__s64 key) +{ + return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY; +} #endif diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 6960beb75f32..e27179b26086 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -180,19 +180,19 @@ INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51), \ __RD(0), RS1(gaddr), RS2(vmid)) -#define CBO_inval(base) \ +#define CBO_INVAL(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(0)) -#define CBO_clean(base) \ +#define CBO_CLEAN(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(1)) -#define CBO_flush(base) \ +#define CBO_FLUSH(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(2)) -#define CBO_zero(base) \ +#define CBO_ZERO(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(4)) diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h index e4042d297580..6441ded3b0cf 100644 --- a/arch/riscv/include/asm/irq_stack.h +++ b/arch/riscv/include/asm/irq_stack.h @@ -12,6 +12,9 @@ DECLARE_PER_CPU(ulong *, irq_stack_ptr); +asmlinkage void call_on_irq_stack(struct pt_regs *regs, + void (*func)(struct pt_regs *)); + #ifdef CONFIG_VMAP_STACK /* * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 5488ecc337b6..57e887bfa34c 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -33,8 +33,8 @@ #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #endif /* - * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so - * define the PAGE_OFFSET value for SV39. + * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so + * define the PAGE_OFFSET value for SV48 and SV39. */ #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index 59ba1fbaf784..00f3369570a8 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -33,4 +33,7 @@ _PAGE_WRITE | _PAGE_EXEC | \ _PAGE_USER | _PAGE_GLOBAL)) +static const __maybe_unused int pgtable_l4_enabled; +static const __maybe_unused int pgtable_l5_enabled; + #endif /* _ASM_RISCV_PGTABLE_32_H */ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 7a5097202e15..9a2c780a11e9 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -126,14 +126,18 @@ enum napot_cont_order { /* * [63:59] T-Head Memory Type definitions: - * - * 00000 - NC Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * bit[63] SO - Strong Order + * bit[62] C - Cacheable + * bit[61] B - Bufferable + * bit[60] SH - Shareable + * bit[59] Sec - Trustable + * 00110 - NC Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable - * 10000 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable */ #define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) -#define _PAGE_NOCACHE_THEAD 0UL -#define _PAGE_IO_THEAD (1UL << 63) +#define _PAGE_NOCACHE_THEAD ((1UL < 61) | (1UL << 60)) +#define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) #define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) static inline u64 riscv_page_mtmask(void) diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index f896708e8331..179bd4afece4 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -16,9 +16,9 @@ #define _PAGE_GLOBAL (1 << 5) /* Global */ #define _PAGE_ACCESSED (1 << 6) /* Set by hardware on any access */ #define _PAGE_DIRTY (1 << 7) /* Set by hardware on any write */ -#define _PAGE_SOFT (1 << 8) /* Reserved for software */ +#define _PAGE_SOFT (3 << 8) /* Reserved for software */ -#define _PAGE_SPECIAL _PAGE_SOFT +#define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index b2ba3f79cfe9..294044429e8e 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -291,6 +291,7 @@ static inline pte_t pud_pte(pud_t pud) } #ifdef CONFIG_RISCV_ISA_SVNAPOT +#include <asm/cpufeature.h> static __always_inline bool has_svnapot(void) { @@ -811,7 +812,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, * bit 5: _PAGE_PROT_NONE (zero) * bit 6: exclusive marker * bits 7 to 11: swap type - * bits 11 to XLEN-1: swap offset + * bits 12 to XLEN-1: swap offset */ #define __SWP_TYPE_SHIFT 7 #define __SWP_TYPE_BITS 5 @@ -914,7 +915,6 @@ extern uintptr_t _dtb_early_pa; #define dtb_early_pa _dtb_early_pa #endif /* CONFIG_XIP_KERNEL */ extern u64 satp_mode; -extern bool pgtable_l4_enabled; void paging_init(void); void misc_mem_init(void); diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 441da1839c94..f19f861cda54 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -8,6 +8,7 @@ #include <linux/const.h> #include <linux/cache.h> +#include <linux/prctl.h> #include <vdso/processor.h> @@ -82,6 +83,7 @@ struct thread_struct { unsigned long bad_cause; unsigned long vstate_ctrl; struct __riscv_v_ext_state vstate; + unsigned long align_ctl; }; /* Whitelist the fstate from the task_struct for hardened usercopy */ @@ -94,6 +96,7 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, #define INIT_THREAD { \ .sp = sizeof(init_stack) + (long)&init_stack, \ + .align_ctl = PR_UNALIGN_NOPRINT, \ } #define task_pt_regs(tsk) \ @@ -136,6 +139,12 @@ extern long riscv_v_vstate_ctrl_set_current(unsigned long arg); extern long riscv_v_vstate_ctrl_get_current(void); #endif /* CONFIG_RISCV_ISA_V */ +extern int get_unalign_ctl(struct task_struct *tsk, unsigned long addr); +extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); + +#define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr)) +#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val)) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 12dfda6bb924..0892f4421bc4 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -280,9 +280,6 @@ void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); void sbi_send_ipi(unsigned int cpu); int sbi_remote_fence_i(const struct cpumask *cpu_mask); -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size); int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, diff --git a/arch/riscv/include/asm/scs.h b/arch/riscv/include/asm/scs.h new file mode 100644 index 000000000000..0e45db78b24b --- /dev/null +++ b/arch/riscv/include/asm/scs.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SCS_H +#define _ASM_SCS_H + +#ifdef __ASSEMBLY__ +#include <asm/asm-offsets.h> + +#ifdef CONFIG_SHADOW_CALL_STACK + +/* Load init_shadow_call_stack to gp. */ +.macro scs_load_init_stack + la gp, init_shadow_call_stack + XIP_FIXUP_OFFSET gp +.endm + +/* Load the per-CPU IRQ shadow call stack to gp. */ +.macro scs_load_irq_stack tmp + load_per_cpu gp, irq_shadow_call_stack_ptr, \tmp +.endm + +/* Load task_scs_sp(current) to gp. */ +.macro scs_load_current + REG_L gp, TASK_TI_SCS_SP(tp) +.endm + +/* Load task_scs_sp(current) to gp, but only if tp has changed. */ +.macro scs_load_current_if_task_changed prev + beq \prev, tp, _skip_scs + scs_load_current +_skip_scs: +.endm + +/* Save gp to task_scs_sp(current). */ +.macro scs_save_current + REG_S gp, TASK_TI_SCS_SP(tp) +.endm + +#else /* CONFIG_SHADOW_CALL_STACK */ + +.macro scs_load_init_stack +.endm +.macro scs_load_irq_stack tmp +.endm +.macro scs_load_current +.endm +.macro scs_load_current_if_task_changed prev +.endm +.macro scs_save_current +.endm + +#endif /* CONFIG_SHADOW_CALL_STACK */ +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_SCS_H */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index a727be723c56..f90d8e42f3c7 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -9,7 +9,7 @@ #include <linux/jump_label.h> #include <linux/sched/task_stack.h> #include <asm/vector.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/csr.h> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 1833beb00489..574779900bfb 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -34,9 +34,6 @@ #ifndef __ASSEMBLY__ -extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; -extern unsigned long spin_shadow_stack; - #include <asm/processor.h> #include <asm/csr.h> @@ -60,8 +57,20 @@ struct thread_info { long user_sp; /* User stack pointer */ int cpu; unsigned long syscall_work; /* SYSCALL_WORK_ flags */ +#ifdef CONFIG_SHADOW_CALL_STACK + void *scs_base; + void *scs_sp; +#endif }; +#ifdef CONFIG_SHADOW_CALL_STACK +#define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ + .scs_sp = init_shadow_call_stack, +#else +#define INIT_SCS +#endif + /* * macros/functions for gaining access to the thread information structure * @@ -71,6 +80,7 @@ struct thread_info { { \ .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ + INIT_SCS \ } void arch_release_task_struct(struct task_struct *tsk); diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 120bcf2ed8a8..1eb5682b2af6 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { - flush_tlb_mm(tlb->mm); +#ifdef CONFIG_MMU + if (tlb->fullmm || tlb->need_flush_all) + flush_tlb_mm(tlb->mm); + else + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, + tlb_get_unmap_size(tlb)); +#endif } #endif /* _ASM_RISCV_TLB_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index a09196f8de68..8f3418c5f172 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -11,6 +11,9 @@ #include <asm/smp.h> #include <asm/errata_list.h> +#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) +#define FLUSH_TLB_NO_ASID ((unsigned long)-1) + #ifdef CONFIG_MMU extern unsigned long asid_mask; @@ -32,9 +35,12 @@ static inline void local_flush_tlb_page(unsigned long addr) #if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned int page_size); void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, @@ -51,14 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, local_flush_tlb_all(); } -#define flush_tlb_mm(mm) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ - /* Flush a range of kernel pages */ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - flush_tlb_all(); + local_flush_tlb_all(); } +#define flush_tlb_mm(mm) flush_tlb_all() +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() +#endif /* !CONFIG_SMP || !CONFIG_MMU */ + #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 14f5d27783b8..96b65a5396df 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -14,7 +14,7 @@ static inline void cpu_relax(void) __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); #endif -#ifdef __riscv_zihintpause +#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE /* * Reduce instruction retirement. * This assumes the PC changes. diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index c5ee07b3df07..87aaef656257 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -15,7 +15,7 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <asm/ptrace.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/asm.h> diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index d696d6610231..11a71b8533d5 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -49,6 +49,7 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_TLS_DTPREL64 9 #define R_RISCV_TLS_TPREL32 10 #define R_RISCV_TLS_TPREL64 11 +#define R_RISCV_IRELATIVE 58 /* Relocation types not used by the dynamic linker */ #define R_RISCV_BRANCH 16 @@ -81,7 +82,6 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_ALIGN 43 #define R_RISCV_RVC_BRANCH 44 #define R_RISCV_RVC_JUMP 45 -#define R_RISCV_LUI 46 #define R_RISCV_GPREL_I 47 #define R_RISCV_GPREL_S 48 #define R_RISCV_TPREL_I 49 @@ -93,6 +93,9 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_SET16 55 #define R_RISCV_SET32 56 #define R_RISCV_32_PCREL 57 +#define R_RISCV_PLT32 59 +#define R_RISCV_SET_ULEB128 60 +#define R_RISCV_SUB_ULEB128 61 #endif /* _UAPI_ASM_RISCV_ELF_H */ diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index d43e306ce2f9..b659ffcfcdb4 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -29,6 +29,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZBA (1 << 3) #define RISCV_HWPROBE_EXT_ZBB (1 << 4) #define RISCV_HWPROBE_EXT_ZBS (1 << 5) +#define RISCV_HWPROBE_EXT_ZICBOZ (1 << 6) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -36,6 +37,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0) #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0) +#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ #endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 95cf25d48405..fee22a3d1b53 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -57,9 +57,10 @@ obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o obj-y += probes/ +obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ -obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_SMP) += smpboot.o diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 56cb2c986c48..e619edc8b0cc 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -14,9 +14,10 @@ */ #include <linux/acpi.h> +#include <linux/efi.h> #include <linux/io.h> +#include <linux/memblock.h> #include <linux/pci.h> -#include <linux/efi.h> int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -217,7 +218,89 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - return (void __iomem *)memremap(phys, size, MEMREMAP_WB); + efi_memory_desc_t *md, *region = NULL; + pgprot_t prot; + + if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP))) + return NULL; + + for_each_efi_memory_desc(md) { + u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + + if (phys < md->phys_addr || phys >= end) + continue; + + if (phys + size > end) { + pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n"); + return NULL; + } + region = md; + break; + } + + /* + * It is fine for AML to remap regions that are not represented in the + * EFI memory map at all, as it only describes normal memory, and MMIO + * regions that require a virtual mapping to make them accessible to + * the EFI runtime services. + */ + prot = PAGE_KERNEL_IO; + if (region) { + switch (region->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + case EFI_PERSISTENT_MEMORY: + if (memblock_is_map_memory(phys) || + !memblock_is_region_memory(phys, size)) { + pr_warn(FW_BUG "requested region covers kernel memory\n"); + return NULL; + } + + /* + * Mapping kernel memory is permitted if the region in + * question is covered by a single memblock with the + * NOMAP attribute set: this enables the use of ACPI + * table overrides passed via initramfs. + * This particular use case only requires read access. + */ + fallthrough; + + case EFI_RUNTIME_SERVICES_CODE: + /* + * This would be unusual, but not problematic per se, + * as long as we take care not to create a writable + * mapping for executable code. + */ + prot = PAGE_KERNEL_RO; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + /* + * ACPI reclaim memory is used to pass firmware tables + * and other data that is intended for consumption by + * the OS only, which may decide it wants to reclaim + * that memory and use it for something else. We never + * do that, but we usually add it to the linear map + * anyway, in which case we should use the existing + * mapping. + */ + if (memblock_is_map_memory(phys)) + return (void __iomem *)__va(phys); + fallthrough; + + default: + if (region->attribute & EFI_MEMORY_WB) + prot = PAGE_KERNEL; + else if ((region->attribute & EFI_MEMORY_WC) || + (region->attribute & EFI_MEMORY_WT)) + prot = pgprot_writecombine(PAGE_KERNEL); + } + } + + return ioremap_prot(phys, size, pgprot_val(prot)); } #ifdef CONFIG_PCI diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index d6a75aac1d27..a03129f40c46 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -14,6 +14,7 @@ #include <asm/thread_info.h> #include <asm/ptrace.h> #include <asm/cpu_ops_sbi.h> +#include <asm/stacktrace.h> #include <asm/suspend.h> void asm_offsets(void); @@ -38,7 +39,11 @@ void asm_offsets(void) OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); +#ifdef CONFIG_SHADOW_CALL_STACK + OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp); +#endif + OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]); OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]); @@ -479,4 +484,8 @@ void asm_offsets(void) OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr); OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr); OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr); + + DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); + OFFSET(STACKFRAME_FP, stackframe, fp); + OFFSET(STACKFRAME_RA, stackframe, ra); } diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index b86e5e2c3aea..62fa393b2eb2 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -76,13 +76,3 @@ quiet_cmd_compat_vdsold = VDSOLD $@ # actual build commands quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_compat_vdso_install = INSTALL $@ - cmd_compat_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/compat_vdso/$@ - -compat_vdso.so: $(obj)/compat_vdso.so.dbg - @mkdir -p $(MODLIB)/compat_vdso - $(call cmd,compat_vdso_install) - -compat_vdso_install: compat_vdso.so diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S index cfdecfbaad62..2b3d9398c113 100644 --- a/arch/riscv/kernel/copy-unaligned.S +++ b/arch/riscv/kernel/copy-unaligned.S @@ -9,7 +9,7 @@ /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ /* Note: The size is truncated to a multiple of 8 * SZREG */ -ENTRY(__riscv_copy_words_unaligned) +SYM_FUNC_START(__riscv_copy_words_unaligned) andi a4, a2, ~((8*SZREG)-1) beqz a4, 2f add a3, a1, a4 @@ -36,12 +36,12 @@ ENTRY(__riscv_copy_words_unaligned) 2: ret -END(__riscv_copy_words_unaligned) +SYM_FUNC_END(__riscv_copy_words_unaligned) /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using only byte accesses. */ /* Note: The size is truncated to a multiple of 8 */ -ENTRY(__riscv_copy_bytes_unaligned) +SYM_FUNC_START(__riscv_copy_bytes_unaligned) andi a4, a2, ~(8-1) beqz a4, 2f add a3, a1, a4 @@ -68,4 +68,4 @@ ENTRY(__riscv_copy_bytes_unaligned) 2: ret -END(__riscv_copy_bytes_unaligned) +SYM_FUNC_END(__riscv_copy_bytes_unaligned) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c17dacb1141c..d11d6320fb0d 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -125,13 +125,14 @@ old_interface: */ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) { - int rc; - for (; node; node = node->parent) { if (of_device_is_compatible(node, "riscv")) { - rc = riscv_of_processor_hartid(node, hartid); - if (!rc) - return 0; + *hartid = (unsigned long)of_get_cpu_hwid(node, 0); + if (*hartid == ~0UL) { + pr_warn("Found CPU without hart ID\n"); + return -ENODEV; + } + return 0; } } @@ -202,9 +203,8 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f) +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) { - seq_puts(f, "isa\t\t: "); if (IS_ENABLED(CONFIG_32BIT)) seq_write(f, "rv32", 4); @@ -212,7 +212,7 @@ static void print_isa(struct seq_file *f) seq_write(f, "rv64", 4); for (int i = 0; i < riscv_isa_ext_count; i++) { - if (!__riscv_isa_extension_available(NULL, riscv_isa_ext[i].id)) + if (!__riscv_isa_extension_available(isa_bitmap, riscv_isa_ext[i].id)) continue; /* Only multi-letter extensions are split by underscores */ @@ -276,7 +276,15 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "processor\t: %lu\n", cpu_id); seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); - print_isa(m); + + /* + * For historical raisins, the isa: line is limited to the lowest common + * denominator of extensions supported across all harts. A true list of + * extensions supported on this hart is printed later in the hart isa: + * line. + */ + seq_puts(m, "isa\t\t: "); + print_isa(m, NULL); print_mmu(m); if (acpi_disabled) { @@ -292,6 +300,13 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid); seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid); seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid); + + /* + * Print the ISA extensions specific to this hart, which may show + * additional extensions not present across all harts. + */ + seq_puts(m, "hart isa\t: "); + print_isa(m, hart_isa[cpu_id].isa); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index e3803822ab5a..b3785ffc1570 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,6 +8,7 @@ #include <linux/acpi.h> #include <linux/bitmap.h> +#include <linux/cpuhotplug.h> #include <linux/ctype.h> #include <linux/log2.h> #include <linux/memory.h> @@ -29,6 +30,7 @@ #define MISALIGNED_ACCESS_JIFFIES_LG2 1 #define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) unsigned long elf_hwcap __read_mostly; @@ -93,10 +95,10 @@ static bool riscv_isa_extension_check(int id) return true; case RISCV_ISA_EXT_ZICBOZ: if (!riscv_cboz_block_size) { - pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n"); + pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); return false; } else if (!is_power_of_2(riscv_cboz_block_size)) { - pr_err("cboz-block-size present, but is not a power-of-2\n"); + pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); return false; } return true; @@ -206,10 +208,11 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc switch (*ext) { case 's': /* - * Workaround for invalid single-letter 's' & 'u'(QEMU). + * Workaround for invalid single-letter 's' & 'u' (QEMU). * No need to set the bit in riscv_isa as 's' & 'u' are - * not valid ISA extensions. It works until multi-letter - * extension starting with "Su" appears. + * not valid ISA extensions. It works unless the first + * multi-letter extension in the ISA string begins with + * "Su" and is not prefixed with an underscore. */ if (ext[-1] != '_' && ext[1] == 'u') { ++isa; @@ -558,23 +561,21 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -void check_unaligned_access(int cpu) +static int check_unaligned_access(void *param) { + int cpu = smp_processor_id(); u64 start_cycles, end_cycles; u64 word_cycles; u64 byte_cycles; int ratio; unsigned long start_jiffies, now; - struct page *page; + struct page *page = param; void *dst; void *src; long speed = RISCV_HWPROBE_MISALIGNED_SLOW; - page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); - if (!page) { - pr_warn("Can't alloc pages to measure memcpy performance"); - return; - } + if (check_unaligned_access_emulated(cpu)) + return 0; /* Make an unaligned destination buffer. */ dst = (void *)((unsigned long)page_address(page) | 0x1); @@ -628,7 +629,7 @@ void check_unaligned_access(int cpu) pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", cpu); - goto out; + return 0; } if (word_cycles < byte_cycles) @@ -642,18 +643,90 @@ void check_unaligned_access(int cpu) (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; + return 0; +} -out: - __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); +static void check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); } -static int check_unaligned_access_boot_cpu(void) +static int riscv_online_cpu(unsigned int cpu) { - check_unaligned_access(0); + static struct page *buf; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + return 0; + + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); return 0; } -arch_initcall(check_unaligned_access_boot_cpu); +/* Measure unaligned access on all CPUs present at boot in parallel. */ +static int check_unaligned_access_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), + GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return 0; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + + /* Setup hotplug callback for any new CPUs that come online. */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, NULL); + +out: + unaligned_emulation_finish(); + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); + return 0; +} + +arch_initcall(check_unaligned_access_all_cpus); + +void riscv_user_isa_enable(void) +{ + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) + csr_set(CSR_SENVCFG, ENVCFG_CBZE); +} #ifdef CONFIG_RISCV_ALTERNATIVE /* diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 143a2bb3e697..54ca4564a926 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -9,10 +9,15 @@ #include <asm/asm.h> #include <asm/csr.h> +#include <asm/scs.h> #include <asm/unistd.h> +#include <asm/page.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/errata_list.h> +#include <linux/sizes.h> + + .section .irqentry.text, "ax" SYM_CODE_START(handle_exception) /* @@ -21,9 +26,9 @@ SYM_CODE_START(handle_exception) * register will contain 0, and we should continue on the current TP. */ csrrw tp, CSR_SCRATCH, tp - bnez tp, _save_context + bnez tp, .Lsave_context -_restore_kernel_tpsp: +.Lrestore_kernel_tpsp: csrr tp, CSR_SCRATCH REG_S sp, TASK_TI_KERNEL_SP(tp) @@ -35,7 +40,7 @@ _restore_kernel_tpsp: REG_L sp, TASK_TI_KERNEL_SP(tp) #endif -_save_context: +.Lsave_context: REG_S sp, TASK_TI_USER_SP(tp) REG_L sp, TASK_TI_KERNEL_SP(tp) addi sp, sp, -(PT_SIZE_ON_STACK) @@ -73,10 +78,11 @@ _save_context: csrw CSR_SCRATCH, x0 /* Load the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop + load_global_pointer + + /* Load the kernel shadow call stack pointer if coming from userspace */ + scs_load_current_if_task_changed s5 + move a0, sp /* pt_regs */ la ra, ret_from_exception @@ -123,6 +129,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception) addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) + /* Save the kernel shadow call stack pointer */ + scs_save_current + /* * Save TP into the scratch register , so we can find the kernel data * structures again. @@ -170,67 +179,15 @@ SYM_CODE_END(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) - /* - * Takes the psuedo-spinlock for the shadow stack, in case multiple - * harts are concurrently overflowing their kernel stacks. We could - * store any value here, but since we're overflowing the kernel stack - * already we only have SP to use as a scratch register. So we just - * swap in the address of the spinlock, as that's definately non-zero. - * - * Pairs with a store_release in handle_bad_stack(). - */ -1: la sp, spin_shadow_stack - REG_AMOSWAP_AQ sp, sp, (sp) - bnez sp, 1b - - la sp, shadow_stack - addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE + /* we reach here from kernel context, sscratch must be 0 */ + csrrw x31, CSR_SCRATCH, x31 + asm_per_cpu sp, overflow_stack, x31 + li x31, OVERFLOW_STACK_SIZE + add sp, sp, x31 + /* zero out x31 again and restore x31 */ + xor x31, x31, x31 + csrrw x31, CSR_SCRATCH, x31 - //save caller register to shadow stack - addi sp, sp, -(PT_SIZE_ON_STACK) - REG_S x1, PT_RA(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - - la ra, restore_caller_reg - tail get_overflow_stack - -restore_caller_reg: - //save per-cpu overflow stack - REG_S a0, -8(sp) - //restore caller register from shadow_stack - REG_L x1, PT_RA(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - - //load per-cpu overflow stack - REG_L sp, -8(sp) addi sp, sp, -(PT_SIZE_ON_STACK) //save context to overflow stack @@ -268,6 +225,43 @@ SYM_CODE_START(ret_from_fork) tail syscall_exit_to_user_mode SYM_CODE_END(ret_from_fork) +#ifdef CONFIG_IRQ_STACKS +/* + * void call_on_irq_stack(struct pt_regs *regs, + * void (*func)(struct pt_regs *)); + * + * Calls func(regs) using the per-CPU IRQ stack. + */ +SYM_FUNC_START(call_on_irq_stack) + /* Create a frame record to save ra and s0 (fp) */ + addi sp, sp, -STACKFRAME_SIZE_ON_STACK + REG_S ra, STACKFRAME_RA(sp) + REG_S s0, STACKFRAME_FP(sp) + addi s0, sp, STACKFRAME_SIZE_ON_STACK + + /* Switch to the per-CPU shadow call stack */ + scs_save_current + scs_load_irq_stack t0 + + /* Switch to the per-CPU IRQ stack and call the handler */ + load_per_cpu t0, irq_stack_ptr, t1 + li t1, IRQ_STACK_SIZE + add sp, t0, t1 + jalr a1 + + /* Switch back to the thread shadow call stack */ + scs_load_current + + /* Switch back to the thread stack and restore ra and s0 */ + addi sp, s0, -STACKFRAME_SIZE_ON_STACK + REG_L ra, STACKFRAME_RA(sp) + REG_L s0, STACKFRAME_FP(sp) + addi sp, sp, STACKFRAME_SIZE_ON_STACK + + ret +SYM_FUNC_END(call_on_irq_stack) +#endif /* CONFIG_IRQ_STACKS */ + /* * Integer register context switch * The callee-saved registers must be saved and restored. @@ -297,6 +291,8 @@ SYM_FUNC_START(__switch_to) REG_S s9, TASK_THREAD_S9_RA(a3) REG_S s10, TASK_THREAD_S10_RA(a3) REG_S s11, TASK_THREAD_S11_RA(a3) + /* Save the kernel shadow call stack pointer */ + scs_save_current /* Restore context from next->thread */ REG_L ra, TASK_THREAD_RA_RA(a4) REG_L sp, TASK_THREAD_SP_RA(a4) @@ -314,6 +310,8 @@ SYM_FUNC_START(__switch_to) REG_L s11, TASK_THREAD_S11_RA(a4) /* The offset of thread_info in task_struct is zero. */ move tp, a1 + /* Switch to the next shadow call stack */ + scs_load_current ret SYM_FUNC_END(__switch_to) @@ -324,7 +322,7 @@ SYM_FUNC_END(__switch_to) .section ".rodata" .align LGREG /* Exception vector table */ -SYM_CODE_START(excp_vect_table) +SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_insn_misaligned ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) RISCV_PTR do_trap_insn_illegal @@ -342,12 +340,11 @@ SYM_CODE_START(excp_vect_table) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ -excp_vect_table_end: -SYM_CODE_END(excp_vect_table) +SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) #ifndef CONFIG_MMU -SYM_CODE_START(__user_rt_sigreturn) +SYM_DATA_START(__user_rt_sigreturn) li a7, __NR_rt_sigreturn ecall -SYM_CODE_END(__user_rt_sigreturn) +SYM_DATA_END(__user_rt_sigreturn) #endif diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index dd2205473de7..2c543f130f93 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -19,7 +19,7 @@ #include <asm/csr.h> #include <asm/asm-offsets.h> -ENTRY(__fstate_save) +SYM_FUNC_START(__fstate_save) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -60,9 +60,9 @@ ENTRY(__fstate_save) sw t0, TASK_THREAD_FCSR_F0(a0) csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_save) +SYM_FUNC_END(__fstate_save) -ENTRY(__fstate_restore) +SYM_FUNC_START(__fstate_restore) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -103,4 +103,125 @@ ENTRY(__fstate_restore) fscsr t0 csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_restore) +SYM_FUNC_END(__fstate_restore) + +#define get_f32(which) fmv.x.s a0, which; j 2f +#define put_f32(which) fmv.s.x which, a1; j 2f +#if __riscv_xlen == 64 +# define get_f64(which) fmv.x.d a0, which; j 2f +# define put_f64(which) fmv.d.x which, a1; j 2f +#else +# define get_f64(which) fsd which, 0(a1); j 2f +# define put_f64(which) fld which, 0(a1); j 2f +#endif + +.macro fp_access_prologue + /* + * Compute jump offset to store the correct FP register since we don't + * have indirect FP register access + */ + sll t0, a0, 3 + la t2, 1f + add t0, t0, t2 + li t1, SR_FS + csrs CSR_STATUS, t1 + jr t0 +1: +.endm + +.macro fp_access_epilogue +2: + csrc CSR_STATUS, t1 + ret +.endm + +#define fp_access_body(__access_func) \ + __access_func(f0); \ + __access_func(f1); \ + __access_func(f2); \ + __access_func(f3); \ + __access_func(f4); \ + __access_func(f5); \ + __access_func(f6); \ + __access_func(f7); \ + __access_func(f8); \ + __access_func(f9); \ + __access_func(f10); \ + __access_func(f11); \ + __access_func(f12); \ + __access_func(f13); \ + __access_func(f14); \ + __access_func(f15); \ + __access_func(f16); \ + __access_func(f17); \ + __access_func(f18); \ + __access_func(f19); \ + __access_func(f20); \ + __access_func(f21); \ + __access_func(f22); \ + __access_func(f23); \ + __access_func(f24); \ + __access_func(f25); \ + __access_func(f26); \ + __access_func(f27); \ + __access_func(f28); \ + __access_func(f29); \ + __access_func(f30); \ + __access_func(f31) + + +#ifdef CONFIG_RISCV_MISALIGNED + +/* + * Disable compressed instructions set to keep a constant offset between FP + * load/store/move instructions + */ +.option norvc +/* + * put_f32_reg - Set a FP register from a register containing the value + * a0 = FP register index to be set + * a1 = value to be loaded in the FP register + */ +SYM_FUNC_START(put_f32_reg) + fp_access_prologue + fp_access_body(put_f32) + fp_access_epilogue +SYM_FUNC_END(put_f32_reg) + +/* + * get_f32_reg - Get a FP register value and return it + * a0 = FP register index to be retrieved + */ +SYM_FUNC_START(get_f32_reg) + fp_access_prologue + fp_access_body(get_f32) + fp_access_epilogue +SYM_FUNC_END(get_f32_reg) + +/* + * put_f64_reg - Set a 64 bits FP register from a value or a pointer. + * a0 = FP register index to be set + * a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we + * load the value to a pointer). + */ +SYM_FUNC_START(put_f64_reg) + fp_access_prologue + fp_access_body(put_f64) + fp_access_epilogue +SYM_FUNC_END(put_f64_reg) + +/* + * put_f64_reg - Get a 64 bits FP register value and returned it or store it to + * a pointer. + * a0 = FP register index to be retrieved + * a1 = If xlen == 32, pointer which should be loaded with the FP register value + * or unused if xlen == 64. In which case the FP register value is returned + * through a0 + */ +SYM_FUNC_START(get_f64_reg) + fp_access_prologue + fp_access_body(get_f64) + fp_access_epilogue +SYM_FUNC_END(get_f64_reg) + +#endif /* CONFIG_RISCV_MISALIGNED */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 3710ea5d160f..b77397432403 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -14,11 +14,12 @@ #include <asm/cpu_ops_sbi.h> #include <asm/hwcap.h> #include <asm/image.h> +#include <asm/scs.h> #include <asm/xip_fixup.h> #include "efi-header.S" __HEAD -ENTRY(_start) +SYM_CODE_START(_start) /* * Image header expected by Linux boot-loaders. The image header data * structure is described in asm/image.h. @@ -110,10 +111,7 @@ relocate_enable_mmu: csrw CSR_TVEC, a0 /* Reload the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop + load_global_pointer /* * Switch to kernel page tables. A full fence is necessary in order to @@ -134,10 +132,7 @@ secondary_start_sbi: csrw CSR_IP, zero /* Load the global pointer */ - .option push - .option norelax - la gp, __global_pointer$ - .option pop + load_global_pointer /* * Disable FPU & VECTOR to detect illegal usage of @@ -159,6 +154,7 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a3 add a3, a3, a1 REG_L sp, (a3) + scs_load_current .Lsecondary_start_common: @@ -168,12 +164,12 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a0 call relocate_enable_mmu #endif - call setup_trap_vector + call .Lsetup_trap_vector tail smp_callin #endif /* CONFIG_SMP */ .align 2 -setup_trap_vector: +.Lsetup_trap_vector: /* Set trap vector to exception handler */ la a0, handle_exception csrw CSR_TVEC, a0 @@ -191,9 +187,9 @@ setup_trap_vector: wfi j .Lsecondary_park -END(_start) +SYM_CODE_END(_start) -ENTRY(_start_kernel) +SYM_CODE_START(_start_kernel) /* Mask all interrupts */ csrw CSR_IE, zero csrw CSR_IP, zero @@ -210,7 +206,7 @@ ENTRY(_start_kernel) * not implement PMPs, so we set up a quick trap handler to just skip * touching the PMPs on any trap. */ - la a0, pmp_done + la a0, .Lpmp_done csrw CSR_TVEC, a0 li a0, -1 @@ -218,7 +214,7 @@ ENTRY(_start_kernel) li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X) csrw CSR_PMPCFG0, a0 .align 2 -pmp_done: +.Lpmp_done: /* * The hartid in a0 is expected later on, and we have no firmware @@ -228,10 +224,7 @@ pmp_done: #endif /* CONFIG_RISCV_M_MODE */ /* Load the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop + load_global_pointer /* * Disable FPU & VECTOR to detect illegal usage of @@ -282,12 +275,12 @@ pmp_done: /* Clear BSS for flat non-ELF images */ la a3, __bss_start la a4, __bss_stop - ble a4, a3, clear_bss_done -clear_bss: + ble a4, a3, .Lclear_bss_done +.Lclear_bss: REG_S zero, (a3) add a3, a3, RISCV_SZPTR - blt a3, a4, clear_bss -clear_bss_done: + blt a3, a4, .Lclear_bss +.Lclear_bss_done: #endif la a2, boot_cpu_hartid XIP_FIXUP_OFFSET a2 @@ -298,6 +291,7 @@ clear_bss_done: la sp, init_thread_union + THREAD_SIZE XIP_FIXUP_OFFSET sp addi sp, sp, -PT_SIZE_ON_STACK + scs_load_init_stack #ifdef CONFIG_BUILTIN_DTB la a0, __dtb_start XIP_FIXUP_OFFSET a0 @@ -311,11 +305,12 @@ clear_bss_done: call relocate_enable_mmu #endif /* CONFIG_MMU */ - call setup_trap_vector + call .Lsetup_trap_vector /* Restore C environment */ la tp, init_task la sp, init_thread_union + THREAD_SIZE addi sp, sp, -PT_SIZE_ON_STACK + scs_load_current #ifdef CONFIG_KASAN call kasan_early_init @@ -353,10 +348,10 @@ clear_bss_done: tail .Lsecondary_start_common #endif /* CONFIG_RISCV_BOOT_SPINWAIT */ -END(_start_kernel) +SYM_CODE_END(_start_kernel) #ifdef CONFIG_RISCV_M_MODE -ENTRY(reset_regs) +SYM_CODE_START_LOCAL(reset_regs) li sp, 0 li gp, 0 li tp, 0 @@ -454,5 +449,5 @@ ENTRY(reset_regs) .Lreset_regs_done_vector: #endif /* CONFIG_RISCV_ISA_V */ ret -END(reset_regs) +SYM_CODE_END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S index d698dd7df637..d040dcf4add4 100644 --- a/arch/riscv/kernel/hibernate-asm.S +++ b/arch/riscv/kernel/hibernate-asm.S @@ -21,7 +21,7 @@ * * Always returns 0 */ -ENTRY(__hibernate_cpu_resume) +SYM_FUNC_START(__hibernate_cpu_resume) /* switch to hibernated image's page table. */ csrw CSR_SATP, s0 sfence.vma @@ -34,7 +34,7 @@ ENTRY(__hibernate_cpu_resume) mv a0, zero ret -END(__hibernate_cpu_resume) +SYM_FUNC_END(__hibernate_cpu_resume) /* * Prepare to restore the image. @@ -42,7 +42,7 @@ END(__hibernate_cpu_resume) * a1: satp of temporary page tables. * a2: cpu_resume. */ -ENTRY(hibernate_restore_image) +SYM_FUNC_START(hibernate_restore_image) mv s0, a0 mv s1, a1 mv s2, a2 @@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image) REG_L a1, relocated_restore_code jr a1 -END(hibernate_restore_image) +SYM_FUNC_END(hibernate_restore_image) /* * The below code will be executed from a 'safe' page. @@ -58,7 +58,7 @@ END(hibernate_restore_image) * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume() * to restore the CPU context. */ -ENTRY(hibernate_core_restore_code) +SYM_FUNC_START(hibernate_core_restore_code) /* switch to temp page table. */ csrw satp, s1 sfence.vma @@ -73,4 +73,4 @@ ENTRY(hibernate_core_restore_code) bnez s4, .Lcopy jr s2 -END(hibernate_core_restore_code) +SYM_FUNC_END(hibernate_core_restore_code) diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h index ea1a10355ce9..3df30dd1c458 100644 --- a/arch/riscv/kernel/image-vars.h +++ b/arch/riscv/kernel/image-vars.h @@ -28,7 +28,9 @@ __efistub__start_kernel = _start_kernel; __efistub__end = _end; __efistub__edata = _edata; __efistub___init_text_end = __init_text_end; +#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB) __efistub_screen_info = screen_info; +#endif #endif diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 9cc0a7669271..9ceda02507ca 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -9,6 +9,7 @@ #include <linux/irqchip.h> #include <linux/irqdomain.h> #include <linux/module.h> +#include <linux/scs.h> #include <linux/seq_file.h> #include <asm/sbi.h> #include <asm/smp.h> @@ -34,6 +35,24 @@ EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode); #ifdef CONFIG_IRQ_STACKS #include <asm/irq_stack.h> +DECLARE_PER_CPU(ulong *, irq_shadow_call_stack_ptr); + +#ifdef CONFIG_SHADOW_CALL_STACK +DEFINE_PER_CPU(ulong *, irq_shadow_call_stack_ptr); +#endif + +static void init_irq_scs(void) +{ + int cpu; + + if (!scs_is_enabled()) + return; + + for_each_possible_cpu(cpu) + per_cpu(irq_shadow_call_stack_ptr, cpu) = + scs_alloc(cpu_to_node(cpu)); +} + DEFINE_PER_CPU(ulong *, irq_stack_ptr); #ifdef CONFIG_VMAP_STACK @@ -61,40 +80,22 @@ static void init_irq_stacks(void) #endif /* CONFIG_VMAP_STACK */ #ifdef CONFIG_SOFTIRQ_ON_OWN_STACK +static void ___do_softirq(struct pt_regs *regs) +{ + __do_softirq(); +} + void do_softirq_own_stack(void) { -#ifdef CONFIG_IRQ_STACKS - if (on_thread_stack()) { - ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id()) - + IRQ_STACK_SIZE/sizeof(ulong); - __asm__ __volatile( - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" ra, (sp) \n" - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" s0, (sp) \n" - "addi s0, sp, 2*"RISCV_SZPTR "\n" - "move sp, %[sp] \n" - "call __do_softirq \n" - "addi sp, s0, -2*"RISCV_SZPTR"\n" - REG_L" s0, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - REG_L" ra, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - : - : [sp] "r" (sp) - : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "t0", "t1", "t2", "t3", "t4", "t5", "t6", -#ifndef CONFIG_FRAME_POINTER - "s0", -#endif - "memory"); - } else -#endif + if (on_thread_stack()) + call_on_irq_stack(NULL, ___do_softirq); + else __do_softirq(); } #endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */ #else +static void init_irq_scs(void) {} static void init_irq_stacks(void) {} #endif /* CONFIG_IRQ_STACKS */ @@ -106,6 +107,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) void __init init_IRQ(void) { + init_irq_scs(); init_irq_stacks(); irqchip_init(); if (!handle_arch_irq) diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index 059c5e216ae7..de0a4b35d01e 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -17,27 +17,17 @@ SYM_CODE_START(riscv_kexec_relocate) * s1: (const) Phys address to jump to after relocation * s2: (const) Phys address of the FDT image * s3: (const) The hartid of the current hart - * s4: Pointer to the destination address for the relocation - * s5: (const) Number of words per page - * s6: (const) 1, used for subtraction - * s7: (const) kernel_map.va_pa_offset, used when switching MMU off - * s8: (const) Physical address of the main loop - * s9: (debug) indirection page counter - * s10: (debug) entry counter - * s11: (debug) copied words counter + * s4: (const) kernel_map.va_pa_offset, used when switching MMU off + * s5: Pointer to the destination address for the relocation + * s6: (const) Physical address of the main loop */ mv s0, a0 mv s1, a1 mv s2, a2 mv s3, a3 - mv s4, zero - li s5, (PAGE_SIZE / RISCV_SZPTR) - li s6, 1 - mv s7, a4 - mv s8, zero - mv s9, zero - mv s10, zero - mv s11, zero + mv s4, a4 + mv s5, zero + mv s6, zero /* Disable / cleanup interrupts */ csrw CSR_SIE, zero @@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate) * the start of the loop below so that we jump there in * any case. */ - la s8, 1f - sub s8, s8, s7 - csrw CSR_STVEC, s8 + la s6, 1f + sub s6, s6, s4 + csrw CSR_STVEC, s6 + + /* + * With C-extension, here we get 42 Bytes and the next + * .align directive would pad zeros here up to 44 Bytes. + * So manually put a nop here to avoid zeros padding. + */ + nop /* Process entries in a loop */ .align 2 1: - addi s10, s10, 1 REG_L t0, 0(s0) /* t0 = *image->entry */ addi s0, s0, RISCV_SZPTR /* image->entry++ */ /* IND_DESTINATION entry ? -> save destination address */ andi t1, t0, 0x1 beqz t1, 2f - andi s4, t0, ~0x1 + andi s5, t0, ~0x1 j 1b 2: @@ -74,9 +70,8 @@ SYM_CODE_START(riscv_kexec_relocate) andi t1, t0, 0x2 beqz t1, 2f andi s0, t0, ~0x2 - addi s9, s9, 1 csrw CSR_SATP, zero - jalr zero, s8, 0 + jr s6 2: /* IND_DONE entry ? -> jump to done label */ @@ -92,14 +87,13 @@ SYM_CODE_START(riscv_kexec_relocate) andi t1, t0, 0x8 beqz t1, 1b /* Unknown entry type, ignore it */ andi t0, t0, ~0x8 - mv t3, s5 /* i = num words per page */ + li t3, (PAGE_SIZE / RISCV_SZPTR) /* i = num words per page */ 3: /* copy loop */ REG_L t1, (t0) /* t1 = *src_ptr */ - REG_S t1, (s4) /* *dst_ptr = *src_ptr */ + REG_S t1, (s5) /* *dst_ptr = *src_ptr */ addi t0, t0, RISCV_SZPTR /* stc_ptr++ */ - addi s4, s4, RISCV_SZPTR /* dst_ptr++ */ - sub t3, t3, s6 /* i-- */ - addi s11, s11, 1 /* c++ */ + addi s5, s5, RISCV_SZPTR /* dst_ptr++ */ + addi t3, t3, -0x1 /* i-- */ beqz t3, 1b /* copy done ? */ j 3b @@ -146,7 +140,7 @@ SYM_CODE_START(riscv_kexec_relocate) */ fence.i - jalr zero, a2, 0 + jr a2 SYM_CODE_END(riscv_kexec_relocate) riscv_kexec_relocate_end: diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 669b8697aa38..58dd96a2a153 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -82,7 +82,7 @@ .endm #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -ENTRY(ftrace_caller) +SYM_FUNC_START(ftrace_caller) SAVE_ABI addi a0, t0, -FENTRY_RA_OFFSET @@ -91,8 +91,7 @@ ENTRY(ftrace_caller) mv a1, ra mv a3, sp -ftrace_call: - .global ftrace_call +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,16 +101,15 @@ ftrace_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_call: - .global ftrace_graph_call +SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ABI jr t0 -ENDPROC(ftrace_caller) +SYM_FUNC_END(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -ENTRY(ftrace_regs_caller) +SYM_FUNC_START(ftrace_regs_caller) SAVE_ALL addi a0, t0, -FENTRY_RA_OFFSET @@ -120,8 +118,7 @@ ENTRY(ftrace_regs_caller) mv a1, ra mv a3, sp -ftrace_regs_call: - .global ftrace_regs_call +SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -131,12 +128,11 @@ ftrace_regs_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_regs_call: - .global ftrace_graph_regs_call +SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ALL jr t0 -ENDPROC(ftrace_regs_caller) +SYM_FUNC_END(ftrace_regs_caller) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 8818a8fa9ff3..b4dd9ed6849e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -61,7 +61,7 @@ SYM_TYPED_FUNC_START(ftrace_stub_graph) ret SYM_FUNC_END(ftrace_stub_graph) -ENTRY(return_to_handler) +SYM_FUNC_START(return_to_handler) /* * On implementing the frame point test, the ideal way is to compare the * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return. @@ -76,25 +76,25 @@ ENTRY(return_to_handler) mv a2, a0 RESTORE_RET_ABI_STATE jalr a2 -ENDPROC(return_to_handler) +SYM_FUNC_END(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -ENTRY(MCOUNT_NAME) +SYM_FUNC_START(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return REG_L t1, 0(t0) - bne t1, t4, do_ftrace_graph_caller + bne t1, t4, .Ldo_ftrace_graph_caller la t3, ftrace_graph_entry REG_L t2, 0(t3) la t6, ftrace_graph_entry_stub - bne t2, t6, do_ftrace_graph_caller + bne t2, t6, .Ldo_ftrace_graph_caller #endif la t3, ftrace_trace_function REG_L t5, 0(t3) - bne t5, t4, do_trace + bne t5, t4, .Ldo_trace ret #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,7 +102,7 @@ ENTRY(MCOUNT_NAME) * A pseudo representation for the function graph tracer: * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) */ -do_ftrace_graph_caller: +.Ldo_ftrace_graph_caller: addi a0, s0, -SZREG mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -118,7 +118,7 @@ do_ftrace_graph_caller: * A pseudo representation for the function tracer: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ -do_trace: +.Ldo_trace: REG_L a1, -SZREG(s0) mv a0, ra @@ -126,6 +126,6 @@ do_trace: jalr t5 RESTORE_ABI_STATE ret -ENDPROC(MCOUNT_NAME) +SYM_FUNC_END(MCOUNT_NAME) #endif EXPORT_SYMBOL(MCOUNT_NAME) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 7c651d55fcbd..56a8c78e9e21 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -7,6 +7,9 @@ #include <linux/elf.h> #include <linux/err.h> #include <linux/errno.h> +#include <linux/hashtable.h> +#include <linux/kernel.h> +#include <linux/log2.h> #include <linux/moduleloader.h> #include <linux/vmalloc.h> #include <linux/sizes.h> @@ -14,6 +17,38 @@ #include <asm/alternative.h> #include <asm/sections.h> +struct used_bucket { + struct list_head head; + struct hlist_head *bucket; +}; + +struct relocation_head { + struct hlist_node node; + struct list_head *rel_entry; + void *location; +}; + +struct relocation_entry { + struct list_head head; + Elf_Addr value; + unsigned int type; +}; + +struct relocation_handlers { + int (*reloc_handler)(struct module *me, void *location, Elf_Addr v); + int (*accumulate_handler)(struct module *me, void *location, + long buffer); +}; + +unsigned int initialize_relocation_hashtable(unsigned int num_relocations); +void process_accumulated_relocations(struct module *me); +int add_relocation_to_accumulate(struct module *me, int type, void *location, + unsigned int hashtable_bits, Elf_Addr v); + +struct hlist_head *relocation_hashtable; + +struct list_head used_buckets_list; + /* * The auipc+jalr instruction pair can reach any PC-relative offset * in the range [-2^31 - 2^11, 2^31 - 2^11) @@ -27,68 +62,90 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) #endif } -static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) +static int riscv_insn_rmw(void *location, u32 keep, u32 set) +{ + u16 *parcel = location; + u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; + + insn &= keep; + insn |= set; + + parcel[0] = cpu_to_le16(insn); + parcel[1] = cpu_to_le16(insn >> 16); + return 0; +} + +static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set) +{ + u16 *parcel = location; + u16 insn = le16_to_cpu(*parcel); + + insn &= keep; + insn |= set; + + *parcel = cpu_to_le16(insn); + return 0; +} + +static int apply_r_riscv_32_rela(struct module *me, void *location, Elf_Addr v) { if (v != (u32)v) { pr_err("%s: value %016llx out of range for 32-bit field\n", me->name, (long long)v); return -EINVAL; } - *location = v; + *(u32 *)location = v; return 0; } -static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v) +static int apply_r_riscv_64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location = v; return 0; } -static int apply_r_riscv_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_branch_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 imm12 = (offset & 0x1000) << (31 - 12); u32 imm11 = (offset & 0x800) >> (11 - 7); u32 imm10_5 = (offset & 0x7e0) << (30 - 10); u32 imm4_1 = (offset & 0x1e) << (11 - 4); - *location = (*location & 0x1fff07f) | imm12 | imm11 | imm10_5 | imm4_1; - return 0; + return riscv_insn_rmw(location, 0x1fff07f, imm12 | imm11 | imm10_5 | imm4_1); } -static int apply_r_riscv_jal_rela(struct module *me, u32 *location, +static int apply_r_riscv_jal_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 imm20 = (offset & 0x100000) << (31 - 20); u32 imm19_12 = (offset & 0xff000); u32 imm11 = (offset & 0x800) << (20 - 11); u32 imm10_1 = (offset & 0x7fe) << (30 - 10); - *location = (*location & 0xfff) | imm20 | imm19_12 | imm11 | imm10_1; - return 0; + return riscv_insn_rmw(location, 0xfff, imm20 | imm19_12 | imm11 | imm10_1); } -static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_branch_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u16 imm8 = (offset & 0x100) << (12 - 8); u16 imm7_6 = (offset & 0xc0) >> (6 - 5); u16 imm5 = (offset & 0x20) >> (5 - 2); u16 imm4_3 = (offset & 0x18) << (12 - 5); u16 imm2_1 = (offset & 0x6) << (12 - 10); - *(u16 *)location = (*(u16 *)location & 0xe383) | - imm8 | imm7_6 | imm5 | imm4_3 | imm2_1; - return 0; + return riscv_insn_rvc_rmw(location, 0xe383, + imm8 | imm7_6 | imm5 | imm4_3 | imm2_1); } -static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_jump_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u16 imm11 = (offset & 0x800) << (12 - 11); u16 imm10 = (offset & 0x400) >> (10 - 8); u16 imm9_8 = (offset & 0x300) << (12 - 11); @@ -98,16 +155,14 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, u16 imm4 = (offset & 0x10) << (12 - 5); u16 imm3_1 = (offset & 0xe) << (12 - 10); - *(u16 *)location = (*(u16 *)location & 0xe003) | - imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1; - return 0; + return riscv_insn_rvc_rmw(location, 0xe003, + imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1); } -static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_hi20_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; - s32 hi20; + ptrdiff_t offset = (void *)v - location; if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( @@ -116,23 +171,20 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = (offset + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000); } -static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, void *location, Elf_Addr v) { /* * v is the lo12 value to fill. It is calculated before calling this * handler. */ - *location = (*location & 0xfffff) | ((v & 0xfff) << 20); - return 0; + return riscv_insn_rmw(location, 0xfffff, (v & 0xfff) << 20); } -static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, void *location, Elf_Addr v) { /* @@ -142,15 +194,12 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, u32 imm11_5 = (v & 0xfe0) << (31 - 11); u32 imm4_0 = (v & 0x1f) << (11 - 4); - *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; - return 0; + return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0); } -static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_hi20_rela(struct module *me, void *location, Elf_Addr v) { - s32 hi20; - if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", @@ -158,22 +207,20 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = ((s32)v + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, ((s32)v + 0x800) & 0xfffff000); } -static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location, +static int apply_r_riscv_lo12_i_rela(struct module *me, void *location, Elf_Addr v) { /* Skip medlow checking because of filtering by HI20 already */ s32 hi20 = ((s32)v + 0x800) & 0xfffff000; s32 lo12 = ((s32)v - hi20); - *location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20); - return 0; + + return riscv_insn_rmw(location, 0xfffff, (lo12 & 0xfff) << 20); } -static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, +static int apply_r_riscv_lo12_s_rela(struct module *me, void *location, Elf_Addr v) { /* Skip medlow checking because of filtering by HI20 already */ @@ -181,20 +228,18 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, s32 lo12 = ((s32)v - hi20); u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11); u32 imm4_0 = (lo12 & 0x1f) << (11 - 4); - *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; - return 0; + + return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0); } -static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_got_hi20_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; - s32 hi20; + ptrdiff_t offset = (void *)v - location; /* Always emit the got entry */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { - offset = module_emit_got_entry(me, v); - offset = (void *)offset - (void *)location; + offset = (void *)module_emit_got_entry(me, v) - location; } else { pr_err( "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", @@ -202,22 +247,19 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = (offset + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000); } -static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, +static int apply_r_riscv_call_plt_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 hi20, lo12; if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { - offset = module_emit_plt_entry(me, v); - offset = (void *)offset - (void *)location; + offset = (void *)module_emit_plt_entry(me, v) - location; } else { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", @@ -228,15 +270,14 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, hi20 = (offset + 0x800) & 0xfffff000; lo12 = (offset - hi20) & 0xfff; - *location = (*location & 0xfff) | hi20; - *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); - return 0; + riscv_insn_rmw(location, 0xfff, hi20); + return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20); } -static int apply_r_riscv_call_rela(struct module *me, u32 *location, +static int apply_r_riscv_call_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 hi20, lo12; if (!riscv_insn_valid_32bit_offset(offset)) { @@ -248,18 +289,17 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, hi20 = (offset + 0x800) & 0xfffff000; lo12 = (offset - hi20) & 0xfff; - *location = (*location & 0xfff) | hi20; - *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); - return 0; + riscv_insn_rmw(location, 0xfff, hi20); + return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20); } -static int apply_r_riscv_relax_rela(struct module *me, u32 *location, +static int apply_r_riscv_relax_rela(struct module *me, void *location, Elf_Addr v) { return 0; } -static int apply_r_riscv_align_rela(struct module *me, u32 *location, +static int apply_r_riscv_align_rela(struct module *me, void *location, Elf_Addr v) { pr_err( @@ -268,91 +308,446 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location, return -EINVAL; } -static int apply_r_riscv_add16_rela(struct module *me, u32 *location, +static int apply_r_riscv_add8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location += (u8)v; + return 0; +} + +static int apply_r_riscv_add16_rela(struct module *me, void *location, Elf_Addr v) { *(u16 *)location += (u16)v; return 0; } -static int apply_r_riscv_add32_rela(struct module *me, u32 *location, +static int apply_r_riscv_add32_rela(struct module *me, void *location, Elf_Addr v) { *(u32 *)location += (u32)v; return 0; } -static int apply_r_riscv_add64_rela(struct module *me, u32 *location, +static int apply_r_riscv_add64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location += (u64)v; return 0; } -static int apply_r_riscv_sub16_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location -= (u8)v; + return 0; +} + +static int apply_r_riscv_sub16_rela(struct module *me, void *location, Elf_Addr v) { *(u16 *)location -= (u16)v; return 0; } -static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub32_rela(struct module *me, void *location, Elf_Addr v) { *(u32 *)location -= (u32)v; return 0; } -static int apply_r_riscv_sub64_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location -= (u64)v; return 0; } -static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, - Elf_Addr v) = { - [R_RISCV_32] = apply_r_riscv_32_rela, - [R_RISCV_64] = apply_r_riscv_64_rela, - [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, - [R_RISCV_JAL] = apply_r_riscv_jal_rela, - [R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela, - [R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela, - [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, - [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, - [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, - [R_RISCV_HI20] = apply_r_riscv_hi20_rela, - [R_RISCV_LO12_I] = apply_r_riscv_lo12_i_rela, - [R_RISCV_LO12_S] = apply_r_riscv_lo12_s_rela, - [R_RISCV_GOT_HI20] = apply_r_riscv_got_hi20_rela, - [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, - [R_RISCV_CALL] = apply_r_riscv_call_rela, - [R_RISCV_RELAX] = apply_r_riscv_relax_rela, - [R_RISCV_ALIGN] = apply_r_riscv_align_rela, - [R_RISCV_ADD16] = apply_r_riscv_add16_rela, - [R_RISCV_ADD32] = apply_r_riscv_add32_rela, - [R_RISCV_ADD64] = apply_r_riscv_add64_rela, - [R_RISCV_SUB16] = apply_r_riscv_sub16_rela, - [R_RISCV_SUB32] = apply_r_riscv_sub32_rela, - [R_RISCV_SUB64] = apply_r_riscv_sub64_rela, +static int dynamic_linking_not_supported(struct module *me, void *location, + Elf_Addr v) +{ + pr_err("%s: Dynamic linking not supported in kernel modules PC = %p\n", + me->name, location); + return -EINVAL; +} + +static int tls_not_supported(struct module *me, void *location, Elf_Addr v) +{ + pr_err("%s: Thread local storage not supported in kernel modules PC = %p\n", + me->name, location); + return -EINVAL; +} + +static int apply_r_riscv_sub6_rela(struct module *me, void *location, Elf_Addr v) +{ + u8 *byte = location; + u8 value = v; + + *byte = (*byte - (value & 0x3f)) & 0x3f; + return 0; +} + +static int apply_r_riscv_set6_rela(struct module *me, void *location, Elf_Addr v) +{ + u8 *byte = location; + u8 value = v; + + *byte = (*byte & 0xc0) | (value & 0x3f); + return 0; +} + +static int apply_r_riscv_set8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location = (u8)v; + return 0; +} + +static int apply_r_riscv_set16_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u16 *)location = (u16)v; + return 0; +} + +static int apply_r_riscv_set32_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u32 *)location = (u32)v; + return 0; +} + +static int apply_r_riscv_32_pcrel_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u32 *)location = v - (uintptr_t)location; + return 0; +} + +static int apply_r_riscv_plt32_rela(struct module *me, void *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - location; + + if (!riscv_insn_valid_32bit_offset(offset)) { + /* Only emit the plt entry if offset over 32-bit range */ + if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { + offset = (void *)module_emit_plt_entry(me, v) - location; + } else { + pr_err("%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + } + + *(u32 *)location = (u32)offset; + return 0; +} + +static int apply_r_riscv_set_uleb128(struct module *me, void *location, Elf_Addr v) +{ + *(long *)location = v; + return 0; +} + +static int apply_r_riscv_sub_uleb128(struct module *me, void *location, Elf_Addr v) +{ + *(long *)location -= v; + return 0; +} + +static int apply_6_bit_accumulation(struct module *me, void *location, long buffer) +{ + u8 *byte = location; + u8 value = buffer; + + if (buffer > 0x3f) { + pr_err("%s: value %ld out of range for 6-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + + *byte = (*byte & 0xc0) | (value & 0x3f); + return 0; +} + +static int apply_8_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U8_MAX) { + pr_err("%s: value %ld out of range for 8-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u8 *)location = (u8)buffer; + return 0; +} + +static int apply_16_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U16_MAX) { + pr_err("%s: value %ld out of range for 16-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u16 *)location = (u16)buffer; + return 0; +} + +static int apply_32_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U32_MAX) { + pr_err("%s: value %ld out of range for 32-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u32 *)location = (u32)buffer; + return 0; +} + +static int apply_64_bit_accumulation(struct module *me, void *location, long buffer) +{ + *(u64 *)location = (u64)buffer; + return 0; +} + +static int apply_uleb128_accumulation(struct module *me, void *location, long buffer) +{ + /* + * ULEB128 is a variable length encoding. Encode the buffer into + * the ULEB128 data format. + */ + u8 *p = location; + + while (buffer != 0) { + u8 value = buffer & 0x7f; + + buffer >>= 7; + value |= (!!buffer) << 7; + + *p++ = value; + } + return 0; +} + +/* + * Relocations defined in the riscv-elf-psabi-doc. + * This handles static linking only. + */ +static const struct relocation_handlers reloc_handlers[] = { + [R_RISCV_32] = { .reloc_handler = apply_r_riscv_32_rela }, + [R_RISCV_64] = { .reloc_handler = apply_r_riscv_64_rela }, + [R_RISCV_RELATIVE] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_COPY] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_JUMP_SLOT] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD64] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL64] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL64] = { .reloc_handler = dynamic_linking_not_supported }, + /* 12-15 undefined */ + [R_RISCV_BRANCH] = { .reloc_handler = apply_r_riscv_branch_rela }, + [R_RISCV_JAL] = { .reloc_handler = apply_r_riscv_jal_rela }, + [R_RISCV_CALL] = { .reloc_handler = apply_r_riscv_call_rela }, + [R_RISCV_CALL_PLT] = { .reloc_handler = apply_r_riscv_call_plt_rela }, + [R_RISCV_GOT_HI20] = { .reloc_handler = apply_r_riscv_got_hi20_rela }, + [R_RISCV_TLS_GOT_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TLS_GD_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_PCREL_HI20] = { .reloc_handler = apply_r_riscv_pcrel_hi20_rela }, + [R_RISCV_PCREL_LO12_I] = { .reloc_handler = apply_r_riscv_pcrel_lo12_i_rela }, + [R_RISCV_PCREL_LO12_S] = { .reloc_handler = apply_r_riscv_pcrel_lo12_s_rela }, + [R_RISCV_HI20] = { .reloc_handler = apply_r_riscv_hi20_rela }, + [R_RISCV_LO12_I] = { .reloc_handler = apply_r_riscv_lo12_i_rela }, + [R_RISCV_LO12_S] = { .reloc_handler = apply_r_riscv_lo12_s_rela }, + [R_RISCV_TPREL_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_LO12_I] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_LO12_S] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_ADD] = { .reloc_handler = tls_not_supported }, + [R_RISCV_ADD8] = { .reloc_handler = apply_r_riscv_add8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_ADD16] = { .reloc_handler = apply_r_riscv_add16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_ADD32] = { .reloc_handler = apply_r_riscv_add32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_ADD64] = { .reloc_handler = apply_r_riscv_add64_rela, + .accumulate_handler = apply_64_bit_accumulation }, + [R_RISCV_SUB8] = { .reloc_handler = apply_r_riscv_sub8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_SUB16] = { .reloc_handler = apply_r_riscv_sub16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_SUB32] = { .reloc_handler = apply_r_riscv_sub32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_SUB64] = { .reloc_handler = apply_r_riscv_sub64_rela, + .accumulate_handler = apply_64_bit_accumulation }, + /* 41-42 reserved for future standard use */ + [R_RISCV_ALIGN] = { .reloc_handler = apply_r_riscv_align_rela }, + [R_RISCV_RVC_BRANCH] = { .reloc_handler = apply_r_riscv_rvc_branch_rela }, + [R_RISCV_RVC_JUMP] = { .reloc_handler = apply_r_riscv_rvc_jump_rela }, + /* 46-50 reserved for future standard use */ + [R_RISCV_RELAX] = { .reloc_handler = apply_r_riscv_relax_rela }, + [R_RISCV_SUB6] = { .reloc_handler = apply_r_riscv_sub6_rela, + .accumulate_handler = apply_6_bit_accumulation }, + [R_RISCV_SET6] = { .reloc_handler = apply_r_riscv_set6_rela, + .accumulate_handler = apply_6_bit_accumulation }, + [R_RISCV_SET8] = { .reloc_handler = apply_r_riscv_set8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_SET16] = { .reloc_handler = apply_r_riscv_set16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_SET32] = { .reloc_handler = apply_r_riscv_set32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_32_PCREL] = { .reloc_handler = apply_r_riscv_32_pcrel_rela }, + [R_RISCV_IRELATIVE] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_PLT32] = { .reloc_handler = apply_r_riscv_plt32_rela }, + [R_RISCV_SET_ULEB128] = { .reloc_handler = apply_r_riscv_set_uleb128, + .accumulate_handler = apply_uleb128_accumulation }, + [R_RISCV_SUB_ULEB128] = { .reloc_handler = apply_r_riscv_sub_uleb128, + .accumulate_handler = apply_uleb128_accumulation }, + /* 62-191 reserved for future standard use */ + /* 192-255 nonstandard ABI extensions */ }; +void process_accumulated_relocations(struct module *me) +{ + /* + * Only ADD/SUB/SET/ULEB128 should end up here. + * + * Each bucket may have more than one relocation location. All + * relocations for a location are stored in a list in a bucket. + * + * Relocations are applied to a temp variable before being stored to the + * provided location to check for overflow. This also allows ULEB128 to + * properly decide how many entries are needed before storing to + * location. The final value is stored into location using the handler + * for the last relocation to an address. + * + * Three layers of indexing: + * - Each of the buckets in use + * - Groups of relocations in each bucket by location address + * - Each relocation entry for a location address + */ + struct used_bucket *bucket_iter; + struct relocation_head *rel_head_iter; + struct relocation_entry *rel_entry_iter; + int curr_type; + void *location; + long buffer; + + list_for_each_entry(bucket_iter, &used_buckets_list, head) { + hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) { + buffer = 0; + location = rel_head_iter->location; + list_for_each_entry(rel_entry_iter, + rel_head_iter->rel_entry, head) { + curr_type = rel_entry_iter->type; + reloc_handlers[curr_type].reloc_handler( + me, &buffer, rel_entry_iter->value); + kfree(rel_entry_iter); + } + reloc_handlers[curr_type].accumulate_handler( + me, location, buffer); + kfree(rel_head_iter); + } + kfree(bucket_iter); + } + + kfree(relocation_hashtable); +} + +int add_relocation_to_accumulate(struct module *me, int type, void *location, + unsigned int hashtable_bits, Elf_Addr v) +{ + struct relocation_entry *entry; + struct relocation_head *rel_head; + struct hlist_head *current_head; + struct used_bucket *bucket; + unsigned long hash; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + INIT_LIST_HEAD(&entry->head); + entry->type = type; + entry->value = v; + + hash = hash_min((uintptr_t)location, hashtable_bits); + + current_head = &relocation_hashtable[hash]; + + /* Find matching location (if any) */ + bool found = false; + struct relocation_head *rel_head_iter; + + hlist_for_each_entry(rel_head_iter, current_head, node) { + if (rel_head_iter->location == location) { + found = true; + rel_head = rel_head_iter; + break; + } + } + + if (!found) { + rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL); + rel_head->rel_entry = + kmalloc(sizeof(struct list_head), GFP_KERNEL); + INIT_LIST_HEAD(rel_head->rel_entry); + rel_head->location = location; + INIT_HLIST_NODE(&rel_head->node); + if (!current_head->first) { + bucket = + kmalloc(sizeof(struct used_bucket), GFP_KERNEL); + INIT_LIST_HEAD(&bucket->head); + bucket->bucket = current_head; + list_add(&bucket->head, &used_buckets_list); + } + hlist_add_head(&rel_head->node, current_head); + } + + /* Add relocation to head of discovered rel_head */ + list_add_tail(&entry->head, rel_head->rel_entry); + + return 0; +} + +unsigned int initialize_relocation_hashtable(unsigned int num_relocations) +{ + /* Can safely assume that bits is not greater than sizeof(long) */ + unsigned long hashtable_size = roundup_pow_of_two(num_relocations); + unsigned int hashtable_bits = ilog2(hashtable_size); + + /* + * Double size of hashtable if num_relocations * 1.25 is greater than + * hashtable_size. + */ + int should_double_size = ((num_relocations + (num_relocations >> 2)) > (hashtable_size)); + + hashtable_bits += should_double_size; + + hashtable_size <<= should_double_size; + + relocation_hashtable = kmalloc_array(hashtable_size, + sizeof(*relocation_hashtable), + GFP_KERNEL); + __hash_init(relocation_hashtable, hashtable_size); + + INIT_LIST_HEAD(&used_buckets_list); + + return hashtable_bits; +} + int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, struct module *me) { Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr; - int (*handler)(struct module *me, u32 *location, Elf_Addr v); + int (*handler)(struct module *me, void *location, Elf_Addr v); Elf_Sym *sym; - u32 *location; + void *location; unsigned int i, type; Elf_Addr v; int res; + unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); + unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations); pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + for (i = 0; i < num_relocations; i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; @@ -370,8 +765,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, type = ELF_RISCV_R_TYPE(rel[i].r_info); - if (type < ARRAY_SIZE(reloc_handlers_rela)) - handler = reloc_handlers_rela[type]; + if (type < ARRAY_SIZE(reloc_handlers)) + handler = reloc_handlers[type].reloc_handler; else handler = NULL; @@ -427,11 +822,16 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } } - res = handler(me, location, v); + if (reloc_handlers[type].accumulate_handler) + res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v); + else + res = handler(me, location, v); if (res) return res; } + process_accumulated_relocations(me); + return 0; } diff --git a/arch/riscv/kernel/probes/rethook_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S index 21bac92a170a..f2cd83d9b0f0 100644 --- a/arch/riscv/kernel/probes/rethook_trampoline.S +++ b/arch/riscv/kernel/probes/rethook_trampoline.S @@ -75,7 +75,7 @@ REG_L x31, PT_T6(sp) .endm -ENTRY(arch_rethook_trampoline) +SYM_CODE_START(arch_rethook_trampoline) addi sp, sp, -(PT_SIZE_ON_STACK) save_all_base_regs @@ -90,4 +90,4 @@ ENTRY(arch_rethook_trampoline) addi sp, sp, PT_SIZE_ON_STACK ret -ENDPROC(arch_rethook_trampoline) +SYM_CODE_END(arch_rethook_trampoline) diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c index d3099d67816d..6c166029079c 100644 --- a/arch/riscv/kernel/probes/simulate-insn.c +++ b/arch/riscv/kernel/probes/simulate-insn.c @@ -24,7 +24,7 @@ static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index, unsigned long val) { if (index == 0) - return false; + return true; else if (index <= 31) *((unsigned long *)regs + index) = val; else diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 194f166b2cc4..4b3dc8beaf77 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -3,6 +3,7 @@ #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/uprobes.h> +#include <asm/insn.h> #include "decode-insn.h" @@ -17,6 +18,11 @@ bool is_swbp_insn(uprobe_opcode_t *insn) #endif } +bool is_trap_insn(uprobe_opcode_t *insn) +{ + return riscv_insn_is_ebreak(*insn) || riscv_insn_is_c_ebreak(*insn); +} + unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) { return instruction_pointer(regs); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index e32d737e039f..4f21d970a129 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -25,6 +25,7 @@ #include <asm/thread_info.h> #include <asm/cpuidle.h> #include <asm/vector.h> +#include <asm/cpufeature.h> register unsigned long gp_in_global __asm__("gp"); @@ -41,6 +42,23 @@ void arch_cpu_idle(void) cpu_do_idle(); } +int set_unalign_ctl(struct task_struct *tsk, unsigned int val) +{ + if (!unaligned_ctl_available()) + return -EINVAL; + + tsk->thread.align_ctl = val; + return 0; +} + +int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) +{ + if (!unaligned_ctl_available()) + return -EINVAL; + + return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr); +} + void __show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index c672c8ba9a2a..5a62ed1da453 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -11,6 +11,7 @@ #include <linux/reboot.h> #include <asm/sbi.h> #include <asm/smp.h> +#include <asm/tlbflush.h> /* default SBI version is 0.1 */ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; @@ -377,31 +378,14 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask) EXPORT_SYMBOL(sbi_remote_fence_i); /** - * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote - * harts for the specified virtual address range. - * @cpu_mask: A cpu mask containing all the target harts. - * @start: Start of the virtual address - * @size: Total size of the virtual address range. - * - * Return: 0 on success, appropriate linux error code otherwise. - */ -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size) -{ - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, - cpu_mask, start, size, 0, 0); -} -EXPORT_SYMBOL(sbi_remote_sfence_vma); - -/** * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given - * remote harts for a virtual address range belonging to a specific ASID. + * remote harts for a virtual address range belonging to a specific ASID or not. * * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. - * @asid: The value of address space identifier (ASID). + * @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID + * for flushing all address spaces. * * Return: 0 on success, appropriate linux error code otherwise. */ @@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long size, unsigned long asid) { - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - cpu_mask, start, size, asid, 0); + if (asid == FLUSH_TLB_NO_ASID) + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + cpu_mask, start, size, 0, 0); + else + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index aac853ae4eb7..535a837de55d 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -15,7 +15,6 @@ #include <linux/memblock.h> #include <linux/sched.h> #include <linux/console.h> -#include <linux/screen_info.h> #include <linux/of_fdt.h> #include <linux/sched/task.h> #include <linux/smp.h> @@ -26,6 +25,7 @@ #include <asm/acpi.h> #include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/early_ioremap.h> #include <asm/pgtable.h> @@ -40,17 +40,6 @@ #include "head.h" -#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI) -struct screen_info screen_info __section(".data") = { - .orig_video_lines = 30, - .orig_video_cols = 80, - .orig_video_mode = 0, - .orig_video_ega_bx = 0, - .orig_video_isVGA = 1, - .orig_video_points = 8 -}; -#endif - /* * The lucky hart to first increment this variable will boot the other cores. * This is used before the kernel initializes the BSS so it can't be in the @@ -301,10 +290,13 @@ void __init setup_arch(char **cmdline_p) riscv_fill_hwcap(); init_rt_signal_env(); apply_boot_alternatives(); + if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) riscv_noncoherent_supported(); riscv_set_dma_cache_alignment(); + + riscv_user_isa_enable(); } static int __init topology_init(void) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 21a4d0e111bc..88b6220b2608 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -384,30 +384,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) sigset_t *oldset = sigmask_to_save(); int ret; - /* Are we from a system call? */ - if (regs->cause == EXC_SYSCALL) { - /* Avoid additional syscall restarting via ret_from_exception */ - regs->cause = -1UL; - /* If so, check system call restarting.. */ - switch (regs->a0) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->a0 = -EINTR; - break; - - case -ERESTARTSYS: - if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { - regs->a0 = -EINTR; - break; - } - fallthrough; - case -ERESTARTNOINTR: - regs->a0 = regs->orig_a0; - regs->epc -= 0x4; - break; - } - } - rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ @@ -421,36 +397,67 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) void arch_do_signal_or_restart(struct pt_regs *regs) { + unsigned long continue_addr = 0, restart_addr = 0; + int retval = 0; struct ksignal ksig; + bool syscall = (regs->cause == EXC_SYSCALL); - if (get_signal(&ksig)) { - /* Actually deliver the signal */ - handle_signal(&ksig, regs); - return; - } + /* If we were from a system call, check for system call restarting */ + if (syscall) { + continue_addr = regs->epc; + restart_addr = continue_addr - 4; + retval = regs->a0; - /* Did we come from a system call? */ - if (regs->cause == EXC_SYSCALL) { /* Avoid additional syscall restarting via ret_from_exception */ regs->cause = -1UL; - /* Restart the system call - no handlers present */ - switch (regs->a0) { + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PC. + */ + switch (retval) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: - regs->a0 = regs->orig_a0; - regs->epc -= 0x4; - break; case -ERESTART_RESTARTBLOCK: - regs->a0 = regs->orig_a0; - regs->a7 = __NR_restart_syscall; - regs->epc -= 0x4; + regs->a0 = regs->orig_a0; + regs->epc = restart_addr; break; } } /* + * Get the signal to deliver. When running under ptrace, at this point + * the debugger may change all of our registers. + */ + if (get_signal(&ksig)) { + /* + * Depending on the signal settings, we may need to revert the + * decision to restart the system call, but skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->epc == restart_addr && + (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK || + (retval == -ERESTARTSYS && + !(ksig.ka.sa.sa_flags & SA_RESTART)))) { + regs->a0 = -EINTR; + regs->epc = continue_addr; + } + + /* Actually deliver the signal */ + handle_signal(&ksig, regs); + return; + } + + /* + * Handle restarting a different system call. As above, if a debugger + * has chosen to restart at a different PC, ignore the restart. + */ + if (syscall && regs->epc == restart_addr && retval == -ERESTART_RESTARTBLOCK) + regs->a7 = __NR_restart_syscall; + + /* * If there is no signal to deliver, we just put the saved * sigmask back. */ diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 1b8da4e40a4d..d162bf339beb 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -25,6 +25,8 @@ #include <linux/of.h> #include <linux/sched/task_stack.h> #include <linux/sched/mm.h> + +#include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/cpufeature.h> #include <asm/irq.h> @@ -246,13 +248,14 @@ asmlinkage __visible void smp_callin(void) numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); - check_unaligned_access(curr_cpuid); if (has_vector()) { if (riscv_v_setup_vsize()) elf_hwcap &= ~COMPAT_HWCAP_ISA_V; } + riscv_user_isa_enable(); + /* * Remote TLB flushes are ignored while the CPU is offline, so emit * a local TLB flush right now just in case. diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S index f7960c7c5f9e..2d54f309c140 100644 --- a/arch/riscv/kernel/suspend_entry.S +++ b/arch/riscv/kernel/suspend_entry.S @@ -16,7 +16,7 @@ .altmacro .option norelax -ENTRY(__cpu_suspend_enter) +SYM_FUNC_START(__cpu_suspend_enter) /* Save registers (except A0 and T0-T6) */ REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) @@ -57,14 +57,11 @@ ENTRY(__cpu_suspend_enter) /* Return to C code */ ret -END(__cpu_suspend_enter) +SYM_FUNC_END(__cpu_suspend_enter) SYM_TYPED_FUNC_START(__cpu_resume_enter) /* Load the global pointer */ - .option push - .option norelax - la gp, __global_pointer$ - .option pop + load_global_pointer #ifdef CONFIG_MMU /* Save A0 and A1 */ diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index b651ec698a91..c712037dbe10 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -145,26 +145,38 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, for_each_cpu(cpu, cpus) { struct riscv_isainfo *isainfo = &hart_isa[cpu]; - if (riscv_isa_extension_available(isainfo->isa, ZBA)) - pair->value |= RISCV_HWPROBE_EXT_ZBA; - else - missing |= RISCV_HWPROBE_EXT_ZBA; - - if (riscv_isa_extension_available(isainfo->isa, ZBB)) - pair->value |= RISCV_HWPROBE_EXT_ZBB; - else - missing |= RISCV_HWPROBE_EXT_ZBB; - - if (riscv_isa_extension_available(isainfo->isa, ZBS)) - pair->value |= RISCV_HWPROBE_EXT_ZBS; - else - missing |= RISCV_HWPROBE_EXT_ZBS; +#define EXT_KEY(ext) \ + do { \ + if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \ + pair->value |= RISCV_HWPROBE_EXT_##ext; \ + else \ + missing |= RISCV_HWPROBE_EXT_##ext; \ + } while (false) + + /* + * Only use EXT_KEY() for extensions which can be exposed to userspace, + * regardless of the kernel's configuration, as no other checks, besides + * presence in the hart_isa bitmap, are made. + */ + EXT_KEY(ZBA); + EXT_KEY(ZBB); + EXT_KEY(ZBS); + EXT_KEY(ZICBOZ); +#undef EXT_KEY } /* Now turn off reporting features if any CPU is missing it. */ pair->value &= ~missing; } +static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +{ + struct riscv_hwprobe pair; + + hwprobe_isa_ext0(&pair, cpus); + return (pair.value & ext); +} + static u64 hwprobe_misaligned(const struct cpumask *cpus) { int cpu; @@ -215,6 +227,12 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = hwprobe_misaligned(cpus); break; + case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) + pair->value = riscv_cboz_block_size; + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug new file mode 100644 index 000000000000..5dba64e8e977 --- /dev/null +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-2.0-only +menu "arch/riscv/kernel Testing and Coverage" + +config AS_HAS_ULEB128 + def_bool $(as-instr,.reloc label$(comma) R_RISCV_SET_ULEB128$(comma) 127\n.reloc label$(comma) R_RISCV_SUB_ULEB128$(comma) 127\nlabel:\n.word 0) + +menuconfig RUNTIME_KERNEL_TESTING_MENU + bool "arch/riscv/kernel runtime Testing" + def_bool y + help + Enable riscv kernel runtime testing. + +if RUNTIME_KERNEL_TESTING_MENU + +config RISCV_MODULE_LINKING_KUNIT + bool "KUnit test riscv module linking at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test riscv module linking at boot. This will + enable a module called "test_module_linking". + + KUnit tests run during boot and output the results to the debug log + in TAP format (http://testanything.org/). Only useful for kernel devs + running the KUnit test harness, and not intended for inclusion into a + production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + +endif # RUNTIME_TESTING_MENU + +endmenu # "arch/riscv/kernel runtime Testing" diff --git a/arch/riscv/kernel/tests/Makefile b/arch/riscv/kernel/tests/Makefile new file mode 100644 index 000000000000..7d6c76cffe20 --- /dev/null +++ b/arch/riscv/kernel/tests/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_RISCV_MODULE_LINKING_KUNIT) += module_test/ diff --git a/arch/riscv/kernel/tests/module_test/Makefile b/arch/riscv/kernel/tests/module_test/Makefile new file mode 100644 index 000000000000..d7a6fd8943de --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/Makefile @@ -0,0 +1,15 @@ +obj-m += test_module_linking.o + +test_sub := test_sub6.o test_sub8.o test_sub16.o test_sub32.o test_sub64.o + +test_set := test_set6.o test_set8.o test_set16.o test_set32.o + +test_module_linking-objs += $(test_sub) + +test_module_linking-objs += $(test_set) + +ifeq ($(CONFIG_AS_HAS_ULEB128),y) +test_module_linking-objs += test_uleb128.o +endif + +test_module_linking-objs += test_module_linking_main.o diff --git a/arch/riscv/kernel/tests/module_test/test_module_linking_main.c b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c new file mode 100644 index 000000000000..8df5fa5b834e --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Rivos Inc. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <kunit/test.h> + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test module linking"); + +extern int test_set32(void); +extern int test_set16(void); +extern int test_set8(void); +extern int test_set6(void); +extern long test_sub64(void); +extern int test_sub32(void); +extern int test_sub16(void); +extern int test_sub8(void); +extern int test_sub6(void); + +#ifdef CONFIG_AS_HAS_ULEB128 +extern int test_uleb_basic(void); +extern int test_uleb_large(void); +#endif + +#define CHECK_EQ(lhs, rhs) KUNIT_ASSERT_EQ(test, lhs, rhs) + +void run_test_set(struct kunit *test); +void run_test_sub(struct kunit *test); +void run_test_uleb(struct kunit *test); + +void run_test_set(struct kunit *test) +{ + int val32 = test_set32(); + int val16 = test_set16(); + int val8 = test_set8(); + int val6 = test_set6(); + + CHECK_EQ(val32, 0); + CHECK_EQ(val16, 0); + CHECK_EQ(val8, 0); + CHECK_EQ(val6, 0); +} + +void run_test_sub(struct kunit *test) +{ + int val64 = test_sub64(); + int val32 = test_sub32(); + int val16 = test_sub16(); + int val8 = test_sub8(); + int val6 = test_sub6(); + + CHECK_EQ(val64, 0); + CHECK_EQ(val32, 0); + CHECK_EQ(val16, 0); + CHECK_EQ(val8, 0); + CHECK_EQ(val6, 0); +} + +#ifdef CONFIG_AS_HAS_ULEB128 +void run_test_uleb(struct kunit *test) +{ + int val_uleb = test_uleb_basic(); + int val_uleb2 = test_uleb_large(); + + CHECK_EQ(val_uleb, 0); + CHECK_EQ(val_uleb2, 0); +} +#endif + +static struct kunit_case __refdata riscv_module_linking_test_cases[] = { + KUNIT_CASE(run_test_set), + KUNIT_CASE(run_test_sub), +#ifdef CONFIG_AS_HAS_ULEB128 + KUNIT_CASE(run_test_uleb), +#endif + {} +}; + +static struct kunit_suite riscv_module_linking_test_suite = { + .name = "riscv_checksum", + .test_cases = riscv_module_linking_test_cases, +}; + +kunit_test_suites(&riscv_module_linking_test_suite); diff --git a/arch/riscv/kernel/tests/module_test/test_set16.S b/arch/riscv/kernel/tests/module_test/test_set16.S new file mode 100644 index 000000000000..2be0e441a12e --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set16.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set16 +test_set16: + lw a0, set16 + la t0, set16 +#ifdef CONFIG_32BIT + slli t0, t0, 16 + srli t0, t0, 16 +#else + slli t0, t0, 48 + srli t0, t0, 48 +#endif + sub a0, a0, t0 + ret +.data +set16: + .reloc set16, R_RISCV_SET16, set16 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set32.S b/arch/riscv/kernel/tests/module_test/test_set32.S new file mode 100644 index 000000000000..de0444537e67 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set32.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set32 +test_set32: + lw a0, set32 + la t0, set32 +#ifndef CONFIG_32BIT + slli t0, t0, 32 + srli t0, t0, 32 +#endif + sub a0, a0, t0 + ret +.data +set32: + .reloc set32, R_RISCV_SET32, set32 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set6.S b/arch/riscv/kernel/tests/module_test/test_set6.S new file mode 100644 index 000000000000..c39ce4c219eb --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set6.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set6 +test_set6: + lw a0, set6 + la t0, set6 +#ifdef CONFIG_32BIT + slli t0, t0, 26 + srli t0, t0, 26 +#else + slli t0, t0, 58 + srli t0, t0, 58 +#endif + sub a0, a0, t0 + ret +.data +set6: + .reloc set6, R_RISCV_SET6, set6 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set8.S b/arch/riscv/kernel/tests/module_test/test_set8.S new file mode 100644 index 000000000000..a656173f6f99 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set8.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set8 +test_set8: + lw a0, set8 + la t0, set8 +#ifdef CONFIG_32BIT + slli t0, t0, 24 + srli t0, t0, 24 +#else + slli t0, t0, 56 + srli t0, t0, 56 +#endif + sub a0, a0, t0 + ret +.data +set8: + .reloc set8, R_RISCV_SET8, set8 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub16.S b/arch/riscv/kernel/tests/module_test/test_sub16.S new file mode 100644 index 000000000000..80f731d599ba --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub16.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub16 +test_sub16: + lh a0, sub16 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub16: + .reloc sub16, R_RISCV_ADD16, second + .reloc sub16, R_RISCV_SUB16, first + .half 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub32.S b/arch/riscv/kernel/tests/module_test/test_sub32.S new file mode 100644 index 000000000000..a341686e12df --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub32.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub32 +test_sub32: + lw a0, sub32 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub32: + .reloc sub32, R_RISCV_ADD32, second + .reloc sub32, R_RISCV_SUB32, first + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub6.S b/arch/riscv/kernel/tests/module_test/test_sub6.S new file mode 100644 index 000000000000..e8b61c1ec527 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub6.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub6 +test_sub6: + lb a0, sub6 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub6: + .reloc sub6, R_RISCV_SET6, second + .reloc sub6, R_RISCV_SUB6, first + .byte 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub64.S b/arch/riscv/kernel/tests/module_test/test_sub64.S new file mode 100644 index 000000000000..a59e8afa88fd --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub64.S @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub64 +test_sub64: +#ifdef CONFIG_32BIT + lw a0, sub64 +#else + ld a0, sub64 +#endif + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub64: + .reloc sub64, R_RISCV_ADD64, second + .reloc sub64, R_RISCV_SUB64, first + .word 0 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub8.S b/arch/riscv/kernel/tests/module_test/test_sub8.S new file mode 100644 index 000000000000..ac5d0ec98de3 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub8.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub8 +test_sub8: + lb a0, sub8 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub8: + .reloc sub8, R_RISCV_ADD8, second + .reloc sub8, R_RISCV_SUB8, first + .byte 0 diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S new file mode 100644 index 000000000000..90f22049d553 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_uleb_basic +test_uleb_basic: + ld a0, second + addi a0, a0, -127 + ret + +.global test_uleb_large +test_uleb_large: + ld a0, fourth + addi a0, a0, -0x07e8 + ret + +.data +first: + .space 127 +second: + .reloc second, R_RISCV_SET_ULEB128, second + .reloc second, R_RISCV_SUB_ULEB128, first + .dword 0 +third: + .space 1000 +fourth: + .reloc fourth, R_RISCV_SET_ULEB128, fourth + .reloc fourth, R_RISCV_SUB_ULEB128, third + .dword 0 diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index fae8f610d867..a1b9be3c4332 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -36,7 +36,21 @@ int show_unhandled_signals = 1; static DEFINE_SPINLOCK(die_lock); -static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs) +static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) +{ + const void __user *uaddr = (__force const void __user *)insns; + + if (!user_mode(regs)) + return get_kernel_nofault(*val, insns); + + /* The user space code from other tasks cannot be accessed. */ + if (regs != task_pt_regs(current)) + return -EPERM; + + return copy_from_user_nofault(val, uaddr, sizeof(*val)); +} + +static void dump_instr(const char *loglvl, struct pt_regs *regs) { char str[sizeof("0000 ") * 12 + 2 + 1], *p = str; const u16 *insns = (u16 *)instruction_pointer(regs); @@ -45,7 +59,7 @@ static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs) int i; for (i = -10; i < 2; i++) { - bad = get_kernel_nofault(val, &insns[i]); + bad = copy_code(regs, &val, &insns[i]); if (!bad) { p += sprintf(p, i == 0 ? "(%04hx) " : "%04hx ", val); } else { @@ -74,7 +88,7 @@ void die(struct pt_regs *regs, const char *str) print_modules(); if (regs) { show_regs(regs); - dump_kernel_instr(KERN_EMERG, regs); + dump_instr(KERN_EMERG, regs); } cause = regs ? regs->cause : -1; @@ -107,6 +121,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); + dump_instr(KERN_EMERG, regs); } force_sig_fault(signo, code, (void __user *)addr); @@ -181,14 +196,6 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); -#ifndef CONFIG_RISCV_M_MODE -DO_ERROR_INFO(do_trap_load_misaligned, - SIGBUS, BUS_ADRALN, "Oops - load address misaligned"); -DO_ERROR_INFO(do_trap_store_misaligned, - SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned"); -#else -int handle_misaligned_load(struct pt_regs *regs); -int handle_misaligned_store(struct pt_regs *regs); asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { @@ -231,7 +238,6 @@ asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs irqentry_nmi_exit(regs, state); } } -#endif DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_s, @@ -360,34 +366,10 @@ static void noinstr handle_riscv_irq(struct pt_regs *regs) asmlinkage void noinstr do_irq(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); -#ifdef CONFIG_IRQ_STACKS - if (on_thread_stack()) { - ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id()) - + IRQ_STACK_SIZE/sizeof(ulong); - __asm__ __volatile( - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" ra, (sp) \n" - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" s0, (sp) \n" - "addi s0, sp, 2*"RISCV_SZPTR "\n" - "move sp, %[sp] \n" - "move a0, %[regs] \n" - "call handle_riscv_irq \n" - "addi sp, s0, -2*"RISCV_SZPTR"\n" - REG_L" s0, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - REG_L" ra, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - : - : [sp] "r" (sp), [regs] "r" (regs) - : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "t0", "t1", "t2", "t3", "t4", "t5", "t6", -#ifndef CONFIG_FRAME_POINTER - "s0", -#endif - "memory"); - } else -#endif + + if (IS_ENABLED(CONFIG_IRQ_STACKS) && on_thread_stack()) + call_on_irq_stack(regs, handle_riscv_irq); + else handle_riscv_irq(regs); irqentry_exit(regs, state); @@ -410,48 +392,14 @@ int is_valid_bugaddr(unsigned long pc) #endif /* CONFIG_GENERIC_BUG */ #ifdef CONFIG_VMAP_STACK -/* - * Extra stack space that allows us to provide panic messages when the kernel - * has overflowed its stack. - */ -static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], +DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)__aligned(16); -/* - * A temporary stack for use by handle_kernel_stack_overflow. This is used so - * we can call into C code to get the per-hart overflow stack. Usage of this - * stack must be protected by spin_shadow_stack. - */ -long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); - -/* - * A pseudo spinlock to protect the shadow stack from being used by multiple - * harts concurrently. This isn't a real spinlock because the lock side must - * be taken without a valid stack and only a single register, it's only taken - * while in the process of panicing anyway so the performance and error - * checking a proper spinlock gives us doesn't matter. - */ -unsigned long spin_shadow_stack; - -asmlinkage unsigned long get_overflow_stack(void) -{ - return (unsigned long)this_cpu_ptr(overflow_stack) + - OVERFLOW_STACK_SIZE; -} asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); - /* - * We're done with the shadow stack by this point, as we're on the - * overflow stack. Tell any other concurrent overflowing harts that - * they can proceed with panicing by releasing the pseudo-spinlock. - * - * This pairs with an amoswap.aq in handle_kernel_stack_overflow. - */ - smp_store_release(&spin_shadow_stack, 0); - console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 378f5b151443..5eba37147caa 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -6,12 +6,16 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/perf_event.h> #include <linux/irq.h> #include <linux/stringify.h> #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/csr.h> +#include <asm/entry-common.h> +#include <asm/hwprobe.h> +#include <asm/cpufeature.h> #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -151,53 +155,134 @@ #define PRECISION_S 0 #define PRECISION_D 1 -#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ -static inline type load_##type(const type *addr) \ -{ \ - type val; \ - asm (#insn " %0, %1" \ - : "=&r" (val) : "m" (*addr)); \ - return val; \ +#ifdef CONFIG_FPU + +#define FP_GET_RD(insn) (insn >> 7 & 0x1F) + +extern void put_f32_reg(unsigned long fp_reg, unsigned long value); + +static int set_f32_rd(unsigned long insn, struct pt_regs *regs, + unsigned long val) +{ + unsigned long fp_reg = FP_GET_RD(insn); + + put_f32_reg(fp_reg, val); + regs->status |= SR_FS_DIRTY; + + return 0; } -#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \ -static inline void store_##type(type *addr, type val) \ -{ \ - asm volatile (#insn " %0, %1\n" \ - : : "r" (val), "m" (*addr)); \ +extern void put_f64_reg(unsigned long fp_reg, unsigned long value); + +static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) +{ + unsigned long fp_reg = FP_GET_RD(insn); + unsigned long value; + +#if __riscv_xlen == 32 + value = (unsigned long) &val; +#else + value = val; +#endif + put_f64_reg(fp_reg, value); + regs->status |= SR_FS_DIRTY; + + return 0; } -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw) -#if defined(CONFIG_64BIT) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld) +#if __riscv_xlen == 32 +extern void get_f64_reg(unsigned long fp_reg, u64 *value); + +static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + u64 val; + + get_f64_reg(fp_reg, &val); + regs->status |= SR_FS_DIRTY; + + return val; +} #else -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw) -static inline u64 load_u64(const u64 *addr) +extern unsigned long get_f64_reg(unsigned long fp_reg); + +static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) { - return load_u32((u32 *)addr) - + ((u64)load_u32((u32 *)addr + 1) << 32); + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + unsigned long val; + + val = get_f64_reg(fp_reg); + regs->status |= SR_FS_DIRTY; + + return val; } -static inline void store_u64(u64 *addr, u64 val) +#endif + +extern unsigned long get_f32_reg(unsigned long fp_reg); + +static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) { - store_u32((u32 *)addr, val); - store_u32((u32 *)addr + 1, val >> 32); + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + unsigned long val; + + val = get_f32_reg(fp_reg); + regs->status |= SR_FS_DIRTY; + + return val; } + +#else /* CONFIG_FPU */ +static void set_f32_rd(unsigned long insn, struct pt_regs *regs, + unsigned long val) {} + +static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {} + +static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + return 0; +} + +static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + return 0; +} + #endif -static inline ulong get_insn(ulong mepc) +#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs)) +#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs)) +#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs)) + +#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs)) +#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs)) +#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs)) + +#ifdef CONFIG_RISCV_M_MODE +static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) +{ + u8 val; + + asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); + *r_val = val; + + return 0; +} + +static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) +{ + asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); + + return 0; +} + +static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn) { register ulong __mepc asm ("a2") = mepc; ulong val, rvc_mask = 3, tmp; @@ -226,23 +311,119 @@ static inline ulong get_insn(ulong mepc) : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), [xlen_minus_16] "i" (XLEN_MINUS_16)); - return val; + *r_insn = val; + + return 0; +} +#else +static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) +{ + if (user_mode(regs)) { + return __get_user(*r_val, addr); + } else { + *r_val = *addr; + return 0; + } +} + +static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) +{ + if (user_mode(regs)) { + return __put_user(val, addr); + } else { + *addr = val; + return 0; + } } +#define __read_insn(regs, insn, insn_addr) \ +({ \ + int __ret; \ + \ + if (user_mode(regs)) { \ + __ret = __get_user(insn, insn_addr); \ + } else { \ + insn = *insn_addr; \ + __ret = 0; \ + } \ + \ + __ret; \ +}) + +static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) +{ + ulong insn = 0; + + if (epc & 0x2) { + ulong tmp = 0; + u16 __user *insn_addr = (u16 __user *)epc; + + if (__read_insn(regs, insn, insn_addr)) + return -EFAULT; + /* __get_user() uses regular "lw" which sign extend the loaded + * value make sure to clear higher order bits in case we "or" it + * below with the upper 16 bits half. + */ + insn &= GENMASK(15, 0); + if ((insn & __INSN_LENGTH_MASK) != __INSN_LENGTH_32) { + *r_insn = insn; + return 0; + } + insn_addr++; + if (__read_insn(regs, tmp, insn_addr)) + return -EFAULT; + *r_insn = (tmp << 16) | insn; + + return 0; + } else { + u32 __user *insn_addr = (u32 __user *)epc; + + if (__read_insn(regs, insn, insn_addr)) + return -EFAULT; + if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) { + *r_insn = insn; + return 0; + } + insn &= GENMASK(15, 0); + *r_insn = insn; + + return 0; + } +} +#endif + union reg_data { u8 data_bytes[8]; ulong data_ulong; u64 data_u64; }; +static bool unaligned_ctl __read_mostly; + +/* sysctl hooks */ +int unaligned_enabled __read_mostly = 1; /* Enabled by default */ + int handle_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; - unsigned long insn = get_insn(epc); - unsigned long addr = csr_read(mtval); + unsigned long insn; + unsigned long addr = regs->badaddr; int i, fp = 0, shift = 0, len = 0; + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + + if (!unaligned_enabled) + return -1; + + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + return -1; + + if (get_insn(regs, epc, &insn)) + return -1; + regs->epc = 0; if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { @@ -305,13 +486,21 @@ int handle_misaligned_load(struct pt_regs *regs) return -1; } + if (!IS_ENABLED(CONFIG_FPU) && fp) + return -EOPNOTSUPP; + val.data_u64 = 0; - for (i = 0; i < len; i++) - val.data_bytes[i] = load_u8((void *)(addr + i)); + for (i = 0; i < len; i++) { + if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) + return -1; + } - if (fp) - return -1; - SET_RD(insn, regs, val.data_ulong << shift >> shift); + if (!fp) + SET_RD(insn, regs, val.data_ulong << shift >> shift); + else if (len == 8) + set_f64_rd(insn, regs, val.data_u64); + else + set_f32_rd(insn, regs, val.data_ulong); regs->epc = epc + INSN_LEN(insn); @@ -322,9 +511,20 @@ int handle_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; - unsigned long insn = get_insn(epc); - unsigned long addr = csr_read(mtval); - int i, len = 0; + unsigned long insn; + unsigned long addr = regs->badaddr; + int i, len = 0, fp = 0; + + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + + if (!unaligned_enabled) + return -1; + + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + return -1; + + if (get_insn(regs, epc, &insn)) + return -1; regs->epc = 0; @@ -336,6 +536,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { len = 8; #endif + } else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2(insn, regs); + } else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2(insn, regs); } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { len = 2; #if defined(CONFIG_64BIT) @@ -354,15 +562,88 @@ int handle_misaligned_store(struct pt_regs *regs) ((insn >> SH_RD) & 0x1f)) { len = 4; val.data_ulong = GET_RS2C(insn, regs); + } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2C(insn, regs); +#if !defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2C(insn, regs); +#endif } else { regs->epc = epc; return -1; } - for (i = 0; i < len; i++) - store_u8((void *)(addr + i), val.data_bytes[i]); + if (!IS_ENABLED(CONFIG_FPU) && fp) + return -EOPNOTSUPP; + + for (i = 0; i < len; i++) { + if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) + return -1; + } regs->epc = epc + INSN_LEN(insn); return 0; } + +bool check_unaligned_access_emulated(int cpu) +{ + long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); + unsigned long tmp_var, tmp_val; + bool misaligned_emu_detected; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + + __asm__ __volatile__ ( + " "REG_L" %[tmp], 1(%[ptr])\n" + : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); + + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + /* + * If unaligned_ctl is already set, this means that we detected that all + * CPUS uses emulated misaligned access at boot time. If that changed + * when hotplugging the new cpu, this is something we don't handle. + */ + if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); + while (true) + cpu_relax(); + } + + return misaligned_emu_detected; +} + +void unaligned_emulation_finish(void) +{ + int cpu; + + /* + * We can only support PR_UNALIGN controls if all CPUs have misaligned + * accesses emulated since tasks requesting such control can run on any + * CPU. + */ + for_each_present_cpu(cpu) { + if (per_cpu(misaligned_access_speed, cpu) != + RISCV_HWPROBE_MISALIGNED_EMULATED) { + return; + } + } + unaligned_ctl = true; +} + +bool unaligned_ctl_available(void) +{ + return unaligned_ctl; +} diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 6b1dba11bf6d..9b517fe1b8a8 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -36,7 +36,7 @@ CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY endif # Disable -pg to prevent insert call site -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n @@ -73,13 +73,3 @@ quiet_cmd_vdsold = VDSOLD $@ cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ rm $@.tmp - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso.so: $(obj)/vdso.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso.so diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S index 82f97d67c23e..8f884227e8bc 100644 --- a/arch/riscv/kernel/vdso/flush_icache.S +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -8,7 +8,7 @@ .text /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ -ENTRY(__vdso_flush_icache) +SYM_FUNC_START(__vdso_flush_icache) .cfi_startproc #ifdef CONFIG_SMP li a7, __NR_riscv_flush_icache @@ -19,4 +19,4 @@ ENTRY(__vdso_flush_icache) #endif ret .cfi_endproc -ENDPROC(__vdso_flush_icache) +SYM_FUNC_END(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S index bb0c05e2ffba..9c1bd531907f 100644 --- a/arch/riscv/kernel/vdso/getcpu.S +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -8,11 +8,11 @@ .text /* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ -ENTRY(__vdso_getcpu) +SYM_FUNC_START(__vdso_getcpu) .cfi_startproc /* For now, just do the syscall. */ li a7, __NR_getcpu ecall ret .cfi_endproc -ENDPROC(__vdso_getcpu) +SYM_FUNC_END(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index d40bec6ac078..cadf725ef798 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -37,7 +37,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, /* This is something we can handle, fill out the pairs. */ while (p < end) { - if (p->key <= RISCV_HWPROBE_MAX_KEY) { + if (riscv_hwprobe_key_is_valid(p->key)) { p->value = avd->all_cpu_hwprobe_values[p->key]; } else { diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S index 10438c7c626a..3dc022aa8931 100644 --- a/arch/riscv/kernel/vdso/rt_sigreturn.S +++ b/arch/riscv/kernel/vdso/rt_sigreturn.S @@ -7,10 +7,10 @@ #include <asm/unistd.h> .text -ENTRY(__vdso_rt_sigreturn) +SYM_FUNC_START(__vdso_rt_sigreturn) .cfi_startproc .cfi_signal_frame li a7, __NR_rt_sigreturn ecall .cfi_endproc -ENDPROC(__vdso_rt_sigreturn) +SYM_FUNC_END(__vdso_rt_sigreturn) diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S index 4e704146c77a..77e57f830521 100644 --- a/arch/riscv/kernel/vdso/sys_hwprobe.S +++ b/arch/riscv/kernel/vdso/sys_hwprobe.S @@ -5,11 +5,11 @@ #include <asm/unistd.h> .text -ENTRY(riscv_hwprobe) +SYM_FUNC_START(riscv_hwprobe) .cfi_startproc li a7, __NR_riscv_hwprobe ecall ret .cfi_endproc -ENDPROC(riscv_hwprobe) +SYM_FUNC_END(riscv_hwprobe) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 82ce64900f3d..cbe2a179331d 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -23,35 +23,31 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } - .note : { *(.note.*) } :text :note .dynamic : { *(.dynamic) } :text :dynamic + .rodata : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } + + .note : { *(.note.*) } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - /* - * This linker script is used both with -r and with -shared. - * For the layouts to match, we need to skip more than enough - * space for the dynamic symbol table, etc. If this amount is - * insufficient, ld -shared will error; simply increase it here. + * Text is well-separated from actual data: there's plenty of + * stuff that isn't used at runtime in between. */ - . = 0x800; + . = ALIGN(16); .text : { *(.text .text.*) } :text . = ALIGN(4); .alternative : { - __alt_start = .; *(.alternative) - __alt_end = .; - } - - .data : { - *(.got.plt) *(.got) - *(.data .data.* .gnu.linkonce.d.*) - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) } } diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 74bb27440527..a944294f6f23 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -14,7 +14,7 @@ #include <linux/kvm_host.h> #include <linux/percpu.h> #include <linux/spinlock.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/kvm_aia_imsic.h> struct aia_hgei_control { diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 48ae0d4b3932..225a435d9c9a 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/kvm_host.h> #include <asm/csr.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/sbi.h> long kvm_arch_dev_ioctl(struct file *filp, diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 44bc324aeeb0..23c0e82b5103 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -12,7 +12,7 @@ #include <linux/kvm_host.h> #include <asm/cacheflush.h> #include <asm/csr.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/insn-def.h> #define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 08ba48a395aa..030904d82b58 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -11,7 +11,7 @@ #include <linux/err.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index c6ebce6126b5..f8c9fa0c03c5 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -13,7 +13,7 @@ #include <linux/uaccess.h> #include <linux/kvm_host.h> #include <asm/cacheflush.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> #include <asm/vector.h> diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index b430cbb69521..b339a2682f25 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -11,7 +11,7 @@ #include <linux/err.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> #include <asm/vector.h> diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S index d7a256eb53f4..b22de1231144 100644 --- a/arch/riscv/lib/clear_page.S +++ b/arch/riscv/lib/clear_page.S @@ -29,41 +29,41 @@ SYM_FUNC_START(clear_page) lw a1, riscv_cboz_block_size add a2, a0, a2 .Lzero_loop: - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 bltu a0, a2, .Lzero_loop ret diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index 1a40d01a9543..44e009ec5fef 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -7,8 +7,7 @@ #include <asm/asm.h> /* void *memcpy(void *, const void *, size_t) */ -ENTRY(__memcpy) -WEAK(memcpy) +SYM_FUNC_START(__memcpy) move t6, a0 /* Preserve return value */ /* Defer to byte-oriented copy for small sizes */ @@ -105,6 +104,7 @@ WEAK(memcpy) bltu a1, a3, 5b 6: ret -END(__memcpy) +SYM_FUNC_END(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) SYM_FUNC_ALIAS(__pi_memcpy, __memcpy) SYM_FUNC_ALIAS(__pi___memcpy, __memcpy) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 838ff2022fe3..cb3e2e7ef0ba 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -7,7 +7,6 @@ #include <asm/asm.h> SYM_FUNC_START(__memmove) -SYM_FUNC_START_WEAK(memmove) /* * Returns * a0 - dest @@ -26,8 +25,8 @@ SYM_FUNC_START_WEAK(memmove) */ /* Return if nothing to do */ - beq a0, a1, return_from_memmove - beqz a2, return_from_memmove + beq a0, a1, .Lreturn_from_memmove + beqz a2, .Lreturn_from_memmove /* * Register Uses @@ -60,7 +59,7 @@ SYM_FUNC_START_WEAK(memmove) * small enough not to bother. */ andi t0, a2, -(2 * SZREG) - beqz t0, byte_copy + beqz t0, .Lbyte_copy /* * Now solve for t5 and t6. @@ -87,14 +86,14 @@ SYM_FUNC_START_WEAK(memmove) */ xor t0, a0, a1 andi t1, t0, (SZREG - 1) - beqz t1, coaligned_copy + beqz t1, .Lcoaligned_copy /* Fall through to misaligned fixup copy */ -misaligned_fixup_copy: - bltu a1, a0, misaligned_fixup_copy_reverse +.Lmisaligned_fixup_copy: + bltu a1, a0, .Lmisaligned_fixup_copy_reverse -misaligned_fixup_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lmisaligned_fixup_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -153,10 +152,10 @@ misaligned_fixup_copy_forward: mv t3, t6 /* Fix the dest pointer in case the loop was broken */ add a1, t3, a5 /* Restore the src pointer */ - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -misaligned_fixup_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lmisaligned_fixup_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -215,18 +214,18 @@ misaligned_fixup_copy_reverse: mv t4, t5 /* Fix the dest pointer in case the loop was broken */ add a4, t4, a5 /* Restore the src pointer */ - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * Simple copy loops for SZREG co-aligned memory locations. * These also make calls to do byte copies for any unaligned * data at their terminations. */ -coaligned_copy: - bltu a1, a0, coaligned_copy_reverse +.Lcoaligned_copy: + bltu a1, a0, .Lcoaligned_copy_reverse -coaligned_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lcoaligned_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward 1: REG_L t1, ( 0 * SZREG)(a1) @@ -235,10 +234,10 @@ coaligned_copy_forward: REG_S t1, (-1 * SZREG)(t3) bne t3, t6, 1b - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -coaligned_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lcoaligned_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse 1: REG_L t1, (-1 * SZREG)(a4) @@ -247,7 +246,7 @@ coaligned_copy_reverse: REG_S t1, ( 0 * SZREG)(t4) bne t4, t5, 1b - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * These are basically sub-functions within the function. They @@ -258,7 +257,7 @@ coaligned_copy_reverse: * up from where they were left and we avoid code duplication * without any overhead except the call in and return jumps. */ -byte_copy_until_aligned_forward: +.Lbyte_copy_until_aligned_forward: beq t3, t5, 2f 1: lb t1, 0(a1) @@ -269,7 +268,7 @@ byte_copy_until_aligned_forward: 2: jalr zero, 0x0(t0) /* Return to multibyte copy loop */ -byte_copy_until_aligned_reverse: +.Lbyte_copy_until_aligned_reverse: beq t4, t6, 2f 1: lb t1, -1(a4) @@ -285,10 +284,10 @@ byte_copy_until_aligned_reverse: * These will byte copy until they reach the end of data to copy. * At that point, they will call to return from memmove. */ -byte_copy: - bltu a1, a0, byte_copy_reverse +.Lbyte_copy: + bltu a1, a0, .Lbyte_copy_reverse -byte_copy_forward: +.Lbyte_copy_forward: beq t3, t4, 2f 1: lb t1, 0(a1) @@ -299,7 +298,7 @@ byte_copy_forward: 2: ret -byte_copy_reverse: +.Lbyte_copy_reverse: beq t4, t3, 2f 1: lb t1, -1(a4) @@ -309,10 +308,10 @@ byte_copy_reverse: bne t4, t3, 1b 2: -return_from_memmove: +.Lreturn_from_memmove: ret -SYM_FUNC_END(memmove) SYM_FUNC_END(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) SYM_FUNC_ALIAS(__pi_memmove, __memmove) SYM_FUNC_ALIAS(__pi___memmove, __memmove) diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S index 34c5360c6705..35f358e70bdb 100644 --- a/arch/riscv/lib/memset.S +++ b/arch/riscv/lib/memset.S @@ -8,8 +8,7 @@ #include <asm/asm.h> /* void *memset(void *, int, size_t) */ -ENTRY(__memset) -WEAK(memset) +SYM_FUNC_START(__memset) move t0, a0 /* Preserve return value */ /* Defer to byte-oriented fill for small sizes */ @@ -110,4 +109,5 @@ WEAK(memset) bltu t0, a3, 5b 6: ret -END(__memset) +SYM_FUNC_END(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 09b47ebacf2e..3ab438f30d13 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -10,8 +10,7 @@ _asm_extable 100b, \lbl .endm -ENTRY(__asm_copy_to_user) -ENTRY(__asm_copy_from_user) +SYM_FUNC_START(__asm_copy_to_user) /* Enable access to user memory */ li t6, SR_SUM @@ -181,13 +180,13 @@ ENTRY(__asm_copy_from_user) csrc CSR_STATUS, t6 sub a0, t5, a0 ret -ENDPROC(__asm_copy_to_user) -ENDPROC(__asm_copy_from_user) +SYM_FUNC_END(__asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_to_user) +SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_from_user) -ENTRY(__clear_user) +SYM_FUNC_START(__clear_user) /* Enable access to user memory */ li t6, SR_SUM @@ -233,5 +232,5 @@ ENTRY(__clear_user) csrc CSR_STATUS, t6 sub a0, a3, a0 ret -ENDPROC(__clear_user) +SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 9c454f90fd3d..3a4dfc8babcf 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -36,3 +36,4 @@ endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o +obj-$(CONFIG_RISCV_NONSTANDARD_CACHE_OPS) += cache-ops.o diff --git a/arch/riscv/mm/cache-ops.c b/arch/riscv/mm/cache-ops.c new file mode 100644 index 000000000000..a993ad11d0ec --- /dev/null +++ b/arch/riscv/mm/cache-ops.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + */ + +#include <asm/dma-noncoherent.h> + +struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init; + +void +riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) +{ + if (!ops) + return; + noncoherent_cache_ops = *ops; +} +EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index f1387272a551..55a34f2020a8 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -3,7 +3,9 @@ * Copyright (C) 2017 SiFive */ +#include <linux/acpi.h> #include <linux/of.h> +#include <asm/acpi.h> #include <asm/cacheflush.h> #ifdef CONFIG_SMP @@ -124,13 +126,24 @@ void __init riscv_init_cbo_blocksizes(void) unsigned long cbom_hartid, cboz_hartid; u32 cbom_block_size = 0, cboz_block_size = 0; struct device_node *node; + struct acpi_table_header *rhct; + acpi_status status; + + if (acpi_disabled) { + for_each_of_cpu_node(node) { + /* set block-size for cbom and/or cboz extension if available */ + cbo_get_block_size(node, "riscv,cbom-block-size", + &cbom_block_size, &cbom_hartid); + cbo_get_block_size(node, "riscv,cboz-block-size", + &cboz_block_size, &cboz_hartid); + } + } else { + status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); + if (ACPI_FAILURE(status)) + return; - for_each_of_cpu_node(node) { - /* set block-size for cbom and/or cboz extension if available */ - cbo_get_block_size(node, "riscv,cbom-block-size", - &cbom_block_size, &cbom_hartid); - cbo_get_block_size(node, "riscv,cboz-block-size", - &cboz_block_size, &cboz_hartid); + acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL); + acpi_put_table((struct acpi_table_header *)rhct); } if (cbom_block_size) diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index b76e7e192eb1..4e4e469b8dd6 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -15,12 +15,6 @@ static bool noncoherent_supported __ro_after_init; int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN; EXPORT_SYMBOL_GPL(dma_cache_alignment); -struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = { - .wback = NULL, - .inv = NULL, - .wback_inv = NULL, -}; - static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); @@ -31,7 +25,7 @@ static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) return; } #endif - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(CLEAN, vaddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) @@ -45,7 +39,7 @@ static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(INVAL, vaddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) @@ -59,7 +53,7 @@ static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(FLUSH, vaddr, size, riscv_cbom_block_size); } static inline bool arch_sync_dma_clean_before_fromdevice(void) @@ -131,7 +125,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) } #endif - ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); + ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size); } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, @@ -162,12 +156,3 @@ void __init riscv_set_dma_cache_alignment(void) if (!noncoherent_supported) dma_cache_alignment = 1; } - -void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) -{ - if (!ops) - return; - - noncoherent_cache_ops = *ops; -} -EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d9a4e8702864..2e011cbddf3a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -49,10 +49,12 @@ u64 satp_mode __ro_after_init = SATP_MODE_32; #endif EXPORT_SYMBOL(satp_mode); +#ifdef CONFIG_64BIT bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); +#endif phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); @@ -664,16 +666,16 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, phys_addr_t size) { - if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) - return PGDIR_SIZE; - - if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) + if (pgtable_l5_enabled && + !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) return P4D_SIZE; - if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) + if (pgtable_l4_enabled && + !(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) return PUD_SIZE; - if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) + if (IS_ENABLED(CONFIG_64BIT) && + !(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) return PMD_SIZE; return PAGE_SIZE; diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 161d0b34c2cb..fc5fc4f785c4 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -5,6 +5,7 @@ #include <linux/pagewalk.h> #include <linux/pgtable.h> +#include <linux/vmalloc.h> #include <asm/tlbflush.h> #include <asm/bitops.h> #include <asm/set_memory.h> @@ -25,19 +26,6 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) return new_val; } -static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - pgd_t val = READ_ONCE(*pgd); - - if (pgd_leaf(val)) { - val = __pgd(set_pageattr_masks(pgd_val(val), walk)); - set_pgd(pgd, val); - } - - return 0; -} - static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, unsigned long next, struct mm_walk *walk) { @@ -96,7 +84,6 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next, } static const struct mm_walk_ops pageattr_ops = { - .pgd_entry = pageattr_pgd_entry, .p4d_entry = pageattr_p4d_entry, .pud_entry = pageattr_pud_entry, .pmd_entry = pageattr_pmd_entry, @@ -105,12 +92,181 @@ static const struct mm_walk_ops pageattr_ops = { .walk_lock = PGWALK_RDLOCK, }; +#ifdef CONFIG_64BIT +static int __split_linear_mapping_pmd(pud_t *pudp, + unsigned long vaddr, unsigned long end) +{ + pmd_t *pmdp; + unsigned long next; + + pmdp = pmd_offset(pudp, vaddr); + + do { + next = pmd_addr_end(vaddr, end); + + if (next - vaddr >= PMD_SIZE && + vaddr <= (vaddr & PMD_MASK) && end >= next) + continue; + + if (pmd_leaf(*pmdp)) { + struct page *pte_page; + unsigned long pfn = _pmd_pfn(*pmdp); + pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK); + pte_t *ptep_new; + int i; + + pte_page = alloc_page(GFP_KERNEL); + if (!pte_page) + return -ENOMEM; + + ptep_new = (pte_t *)page_address(pte_page); + for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep_new) + set_pte(ptep_new, pfn_pte(pfn + i, prot)); + + smp_wmb(); + + set_pmd(pmdp, pfn_pmd(page_to_pfn(pte_page), PAGE_TABLE)); + } + } while (pmdp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_pud(p4d_t *p4dp, + unsigned long vaddr, unsigned long end) +{ + pud_t *pudp; + unsigned long next; + int ret; + + pudp = pud_offset(p4dp, vaddr); + + do { + next = pud_addr_end(vaddr, end); + + if (next - vaddr >= PUD_SIZE && + vaddr <= (vaddr & PUD_MASK) && end >= next) + continue; + + if (pud_leaf(*pudp)) { + struct page *pmd_page; + unsigned long pfn = _pud_pfn(*pudp); + pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK); + pmd_t *pmdp_new; + int i; + + pmd_page = alloc_page(GFP_KERNEL); + if (!pmd_page) + return -ENOMEM; + + pmdp_new = (pmd_t *)page_address(pmd_page); + for (i = 0; i < PTRS_PER_PMD; ++i, ++pmdp_new) + set_pmd(pmdp_new, + pfn_pmd(pfn + ((i * PMD_SIZE) >> PAGE_SHIFT), prot)); + + smp_wmb(); + + set_pud(pudp, pfn_pud(page_to_pfn(pmd_page), PAGE_TABLE)); + } + + ret = __split_linear_mapping_pmd(pudp, vaddr, next); + if (ret) + return ret; + } while (pudp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + p4d_t *p4dp; + unsigned long next; + int ret; + + p4dp = p4d_offset(pgdp, vaddr); + + do { + next = p4d_addr_end(vaddr, end); + + /* + * If [vaddr; end] contains [vaddr & P4D_MASK; next], we don't + * need to split, we'll change the protections on the whole P4D. + */ + if (next - vaddr >= P4D_SIZE && + vaddr <= (vaddr & P4D_MASK) && end >= next) + continue; + + if (p4d_leaf(*p4dp)) { + struct page *pud_page; + unsigned long pfn = _p4d_pfn(*p4dp); + pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK); + pud_t *pudp_new; + int i; + + pud_page = alloc_page(GFP_KERNEL); + if (!pud_page) + return -ENOMEM; + + /* + * Fill the pud level with leaf puds that have the same + * protections as the leaf p4d. + */ + pudp_new = (pud_t *)page_address(pud_page); + for (i = 0; i < PTRS_PER_PUD; ++i, ++pudp_new) + set_pud(pudp_new, + pfn_pud(pfn + ((i * PUD_SIZE) >> PAGE_SHIFT), prot)); + + /* + * Make sure the pud filling is not reordered with the + * p4d store which could result in seeing a partially + * filled pud level. + */ + smp_wmb(); + + set_p4d(p4dp, pfn_p4d(page_to_pfn(pud_page), PAGE_TABLE)); + } + + ret = __split_linear_mapping_pud(p4dp, vaddr, next); + if (ret) + return ret; + } while (p4dp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_pgd(pgd_t *pgdp, + unsigned long vaddr, + unsigned long end) +{ + unsigned long next; + int ret; + + do { + next = pgd_addr_end(vaddr, end); + /* We never use PGD mappings for the linear mapping */ + ret = __split_linear_mapping_p4d(pgdp, vaddr, next); + if (ret) + return ret; + } while (pgdp++, vaddr = next, vaddr != end); + + return 0; +} + +static int split_linear_mapping(unsigned long start, unsigned long end) +{ + return __split_linear_mapping_pgd(pgd_offset_k(start), start, end); +} +#endif /* CONFIG_64BIT */ + static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { int ret; unsigned long start = addr; unsigned long end = start + PAGE_SIZE * numpages; + unsigned long __maybe_unused lm_start; + unsigned long __maybe_unused lm_end; struct pageattr_masks masks = { .set_mask = set_mask, .clear_mask = clear_mask @@ -120,11 +276,67 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, return 0; mmap_write_lock(&init_mm); + +#ifdef CONFIG_64BIT + /* + * We are about to change the permissions of a kernel mapping, we must + * apply the same changes to its linear mapping alias, which may imply + * splitting a huge mapping. + */ + + if (is_vmalloc_or_module_addr((void *)start)) { + struct vm_struct *area = NULL; + int i, page_start; + + area = find_vm_area((void *)start); + page_start = (start - (unsigned long)area->addr) >> PAGE_SHIFT; + + for (i = page_start; i < page_start + numpages; ++i) { + lm_start = (unsigned long)page_address(area->pages[i]); + lm_end = lm_start + PAGE_SIZE; + + ret = split_linear_mapping(lm_start, lm_end); + if (ret) + goto unlock; + + ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + &pageattr_ops, NULL, &masks); + if (ret) + goto unlock; + } + } else if (is_kernel_mapping(start) || is_linear_mapping(start)) { + lm_start = (unsigned long)lm_alias(start); + lm_end = (unsigned long)lm_alias(end); + + ret = split_linear_mapping(lm_start, lm_end); + if (ret) + goto unlock; + + ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + &pageattr_ops, NULL, &masks); + if (ret) + goto unlock; + } + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); + +unlock: + mmap_write_unlock(&init_mm); + + /* + * We can't use flush_tlb_kernel_range() here as we may have split a + * hugepage that is larger than that, so let's flush everything. + */ + flush_tlb_all(); +#else + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + &masks); + mmap_write_unlock(&init_mm); flush_tlb_kernel_range(start, end); +#endif return ret; } @@ -159,36 +371,14 @@ int set_memory_nx(unsigned long addr, int numpages) int set_direct_map_invalid_noflush(struct page *page) { - int ret; - unsigned long start = (unsigned long)page_address(page); - unsigned long end = start + PAGE_SIZE; - struct pageattr_masks masks = { - .set_mask = __pgprot(0), - .clear_mask = __pgprot(_PAGE_PRESENT) - }; - - mmap_read_lock(&init_mm); - ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); - mmap_read_unlock(&init_mm); - - return ret; + return __set_memory((unsigned long)page_address(page), 1, + __pgprot(0), __pgprot(_PAGE_PRESENT)); } int set_direct_map_default_noflush(struct page *page) { - int ret; - unsigned long start = (unsigned long)page_address(page); - unsigned long end = start + PAGE_SIZE; - struct pageattr_masks masks = { - .set_mask = PAGE_KERNEL, - .clear_mask = __pgprot(0) - }; - - mmap_read_lock(&init_mm); - ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); - mmap_read_unlock(&init_mm); - - return ret; + return __set_memory((unsigned long)page_address(page), 1, + PAGE_KERNEL, __pgprot(0)); } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c index c5fc5ec96f6d..370a422ede11 100644 --- a/arch/riscv/mm/pmem.c +++ b/arch/riscv/mm/pmem.c @@ -17,7 +17,7 @@ void arch_wb_cache_pmem(void *addr, size_t size) return; } #endif - ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); + ALT_CMO_OP(CLEAN, addr, size, riscv_cbom_block_size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); @@ -29,6 +29,6 @@ void arch_invalidate_pmem(void *addr, size_t size) return; } #endif - ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); + ALT_CMO_OP(INVAL, addr, size, riscv_cbom_block_size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 20a9f991a6d7..657c27bc07a7 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -129,55 +129,55 @@ static struct ptd_mm_info efi_ptd_info = { /* Page Table Entry */ struct prot_bits { u64 mask; - u64 val; const char *set; const char *clear; }; static const struct prot_bits pte_bits[] = { { +#ifdef CONFIG_64BIT + .mask = _PAGE_NAPOT, + .set = "N", + .clear = ".", + }, { + .mask = _PAGE_MTMASK_SVPBMT, + .set = "MT(%s)", + .clear = " .. ", + }, { +#endif .mask = _PAGE_SOFT, - .val = _PAGE_SOFT, - .set = "RSW", - .clear = " ", + .set = "RSW(%d)", + .clear = " .. ", }, { .mask = _PAGE_DIRTY, - .val = _PAGE_DIRTY, .set = "D", .clear = ".", }, { .mask = _PAGE_ACCESSED, - .val = _PAGE_ACCESSED, .set = "A", .clear = ".", }, { .mask = _PAGE_GLOBAL, - .val = _PAGE_GLOBAL, .set = "G", .clear = ".", }, { .mask = _PAGE_USER, - .val = _PAGE_USER, .set = "U", .clear = ".", }, { .mask = _PAGE_EXEC, - .val = _PAGE_EXEC, .set = "X", .clear = ".", }, { .mask = _PAGE_WRITE, - .val = _PAGE_WRITE, .set = "W", .clear = ".", }, { .mask = _PAGE_READ, - .val = _PAGE_READ, .set = "R", .clear = ".", }, { .mask = _PAGE_PRESENT, - .val = _PAGE_PRESENT, .set = "V", .clear = ".", } @@ -208,15 +208,30 @@ static void dump_prot(struct pg_state *st) unsigned int i; for (i = 0; i < ARRAY_SIZE(pte_bits); i++) { - const char *s; + char s[7]; + unsigned long val; - if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val) - s = pte_bits[i].set; - else - s = pte_bits[i].clear; + val = st->current_prot & pte_bits[i].mask; + if (val) { + if (pte_bits[i].mask == _PAGE_SOFT) + sprintf(s, pte_bits[i].set, val >> 8); +#ifdef CONFIG_64BIT + else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) { + if (val == _PAGE_NOCACHE_SVPBMT) + sprintf(s, pte_bits[i].set, "NC"); + else if (val == _PAGE_IO_SVPBMT) + sprintf(s, pte_bits[i].set, "IO"); + else + sprintf(s, pte_bits[i].set, "??"); + } +#endif + else + sprintf(s, "%s", pte_bits[i].set); + } else { + sprintf(s, "%s", pte_bits[i].clear); + } - if (s) - pt_dump_seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } } @@ -384,6 +399,9 @@ static int __init ptdump_init(void) kernel_ptd_info.base_addr = KERN_VIRT_START; + pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD"; + pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD"; + for (i = 0; i < ARRAY_SIZE(pg_level); i++) for (j = 0; j < ARRAY_SIZE(pte_bits); j++) pg_level[i].mask |= pte_bits[j].mask; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 77be59aadc73..e6659d7368b3 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -3,33 +3,56 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/sched.h> +#include <linux/hugetlb.h> #include <asm/sbi.h> #include <asm/mmu_context.h> static inline void local_flush_tlb_all_asid(unsigned long asid) { - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); + else + local_flush_tlb_all(); } static inline void local_flush_tlb_page_asid(unsigned long addr, unsigned long asid) { - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); + else + local_flush_tlb_page(addr); } -static inline void local_flush_tlb_range(unsigned long start, - unsigned long size, unsigned long stride) +/* + * Flush entire TLB if number of entries to be flushed is greater + * than the threshold below. + */ +static unsigned long tlb_flush_all_threshold __read_mostly = 64; + +static void local_flush_tlb_range_threshold_asid(unsigned long start, + unsigned long size, + unsigned long stride, + unsigned long asid) { - if (size <= stride) - local_flush_tlb_page(start); - else - local_flush_tlb_all(); + unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride); + int i; + + if (nr_ptes_in_range > tlb_flush_all_threshold) { + local_flush_tlb_all_asid(asid); + return; + } + + for (i = 0; i < nr_ptes_in_range; ++i) { + local_flush_tlb_page_asid(start, asid); + start += stride; + } } static inline void local_flush_tlb_range_asid(unsigned long start, @@ -37,8 +60,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start, { if (size <= stride) local_flush_tlb_page_asid(start, asid); - else + else if (size == FLUSH_TLB_MAX_SIZE) local_flush_tlb_all_asid(asid); + else + local_flush_tlb_range_threshold_asid(start, size, stride, asid); } static void __ipi_flush_tlb_all(void *info) @@ -51,7 +76,7 @@ void flush_tlb_all(void) if (riscv_use_ipi_for_rfence()) on_each_cpu(__ipi_flush_tlb_all, NULL, 1); else - sbi_remote_sfence_vma(NULL, 0, -1); + sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); } struct flush_tlb_range_data { @@ -68,68 +93,62 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __ipi_flush_tlb_range(void *info) -{ - struct flush_tlb_range_data *d = info; - - local_flush_tlb_range(d->start, d->size, d->stride); -} - static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct flush_tlb_range_data ftd; - struct cpumask *cmask = mm_cpumask(mm); - unsigned int cpuid; + const struct cpumask *cmask; + unsigned long asid = FLUSH_TLB_NO_ASID; bool broadcast; - if (cpumask_empty(cmask)) - return; + if (mm) { + unsigned int cpuid; - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - if (static_branch_unlikely(&use_asid_allocator)) { - unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; - - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { - local_flush_tlb_range_asid(start, size, stride, asid); - } + cmask = mm_cpumask(mm); + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + /* check if the tlbflush needs to be sent to other CPUs */ + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + + if (static_branch_unlikely(&use_asid_allocator)) + asid = atomic_long_read(&mm->context.id) & asid_mask; } else { - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = 0; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range, - &ftd, 1); - } else - sbi_remote_sfence_vma(cmask, start, size); - } else { - local_flush_tlb_range(start, size, stride); - } + cmask = cpu_online_mask; + broadcast = true; } - put_cpu(); + if (broadcast) { + if (riscv_use_ipi_for_rfence()) { + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range_asid, + &ftd, 1); + } else + sbi_remote_sfence_vma_asid(cmask, + start, size, asid); + } else { + local_flush_tlb_range_asid(start, size, stride, asid); + } + + if (mm) + put_cpu(); } void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm, 0, -1, PAGE_SIZE); + __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); +} + +void flush_tlb_mm_range(struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned int page_size) +{ + __flush_tlb_range(mm, start, end - start, page_size); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) @@ -140,8 +159,40 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE); + unsigned long stride_size; + + if (!is_vm_hugetlb_page(vma)) { + stride_size = PAGE_SIZE; + } else { + stride_size = huge_page_size(hstate_vma(vma)); + + /* + * As stated in the privileged specification, every PTE in a + * NAPOT region must be invalidated, so reset the stride in that + * case. + */ + if (has_svnapot()) { + if (stride_size >= PGDIR_SIZE) + stride_size = PGDIR_SIZE; + else if (stride_size >= P4D_SIZE) + stride_size = P4D_SIZE; + else if (stride_size >= PUD_SIZE) + stride_size = PUD_SIZE; + else if (stride_size >= PMD_SIZE) + stride_size = PMD_SIZE; + else + stride_size = PAGE_SIZE; + } + } + + __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); } + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index 9e6476719abb..280b0eb352b8 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -81,6 +81,14 @@ ifdef CONFIG_CFI_CLANG PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI) endif +ifdef CONFIG_RELOCATABLE +PURGATORY_CFLAGS_REMOVE += -fPIE +endif + +ifdef CONFIG_SHADOW_CALL_STACK +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_SCS) +endif + CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 0194f4554130..5bcf3af903da 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,15 +7,11 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ - -.macro size, sym:req - .size \sym, . - \sym -.endm +#include <linux/linkage.h> .text -.globl purgatory_start -purgatory_start: +SYM_CODE_START(purgatory_start) lla sp, .Lstack mv s0, a0 /* The hartid of the current hart */ @@ -28,8 +24,7 @@ purgatory_start: mv a1, s1 ld a2, riscv_kernel_entry jr a2 - -size purgatory_start +SYM_CODE_END(purgatory_start) .align 4 .rept 256 @@ -39,9 +34,6 @@ size purgatory_start .data -.globl riscv_kernel_entry -riscv_kernel_entry: - .quad 0 -size riscv_kernel_entry +SYM_DATA(riscv_kernel_entry, .quad 0) .end diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ae29e4392664..3bec98d20283 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -131,6 +131,7 @@ config S390 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 + select DCACHE_WORD_ACCESS if !KMSAN select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER select FUNCTION_ALIGNMENT_8B if CC_IS_GCC @@ -235,6 +236,7 @@ config S390 select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select TTY + select USER_STACKTRACE_SUPPORT select VIRT_CPU_ACCOUNTING select ZONE_DMA # Note: keep the above list sorted alphabetically diff --git a/arch/s390/Makefile b/arch/s390/Makefile index a53a36ee0731..73873e451686 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -138,9 +138,6 @@ bzImage: vmlinux zfcpdump: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -vdso_install: - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ - archheaders: $(Q)$(MAKE) $(build)=$(syscalls) uapi @@ -160,6 +157,9 @@ vdso_prepare: prepare0 $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) +vdso-install-y += arch/s390/kernel/vdso64/vdso64.so.dbg +vdso-install-$(CONFIG_COMPAT) += arch/s390/kernel/vdso32/vdso32.so.dbg + ifdef CONFIG_EXPOLINE_EXTERN modules_prepare: expoline_prepare expoline_prepare: scripts diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 7b7521762633..2ab4872fbee1 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/ctype.h> #include <linux/pgtable.h> +#include <asm/page-states.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> @@ -24,6 +25,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; struct ipl_parameter_block __bootdata_preserved(ipl_block); int __bootdata_preserved(ipl_block_valid); int __bootdata_preserved(__kaslr_enabled); +int __bootdata_preserved(cmma_flag) = 1; unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE; unsigned long memory_limit; @@ -295,6 +297,12 @@ void parse_boot_command_line(void) if (!strcmp(param, "nokaslr")) __kaslr_enabled = 0; + if (!strcmp(param, "cmma")) { + rc = kstrtobool(val, &enabled); + if (!rc && !enabled) + cmma_flag = 0; + } + #if IS_ENABLED(CONFIG_KVM) if (!strcmp(param, "prot_virt")) { rc = kstrtobool(val, &enabled); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index d3e48bd9c394..8104e0e3d188 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/string.h> #include <linux/elf.h> +#include <asm/page-states.h> #include <asm/boot_data.h> #include <asm/sections.h> #include <asm/maccess.h> @@ -49,7 +50,7 @@ static void detect_facilities(void) { if (test_facility(8)) { machine.has_edat1 = 1; - __ctl_set_bit(0, 23); + local_ctl_set_bit(0, CR0_EDAT_BIT); } if (test_facility(78)) machine.has_edat2 = 1; @@ -57,6 +58,48 @@ static void detect_facilities(void) machine.has_nx = 1; } +static int cmma_test_essa(void) +{ + unsigned long reg1, reg2, tmp = 0; + int rc = 1; + psw_t old; + + /* Test ESSA_GET_STATE */ + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" + " la %[rc],0\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + [tmp] "=&d" (tmp), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [cmd] "i" (ESSA_GET_STATE) + : "cc", "memory"); + return rc; +} + +static void cmma_init(void) +{ + if (!cmma_flag) + return; + if (cmma_test_essa()) { + cmma_flag = 0; + return; + } + if (test_facility(147)) + cmma_flag = 2; +} + static void setup_lpp(void) { S390_lowcore.current_pid = 0; @@ -306,6 +349,7 @@ void startup_kernel(void) setup_boot_command_line(); parse_boot_command_line(); detect_facilities(); + cmma_init(); sanitize_prot_virt_host(); max_physmem_end = detect_max_physmem_end(); setup_ident_map_size(max_physmem_end); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 442a74f113cb..e3a4500a5a75 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -2,16 +2,18 @@ #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> +#include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/facility.h> #include <asm/sections.h> +#include <asm/ctlreg.h> #include <asm/physmem_info.h> #include <asm/maccess.h> #include <asm/abs_lowcore.h> #include "decompressor.h" #include "boot.h" -unsigned long __bootdata_preserved(s390_invalid_asce); +struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); @@ -69,6 +71,10 @@ static void kasan_populate_shadow(void) crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); + __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pte, 1); /* * Current memory layout: @@ -166,8 +172,6 @@ static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) { - pte_t entry; - if (mode == POPULATE_KASAN_ZERO_SHADOW) { set_pte(pte, pte_z); return true; @@ -224,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val) table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); crst_table_init(table, val); + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; } @@ -239,6 +244,7 @@ static pte_t *boot_pte_alloc(void) if (!pte_leftover) { pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; + __arch_set_page_dat(pte, 1); } else { pte = pte_leftover; pte_leftover = NULL; @@ -419,6 +425,14 @@ void setup_vmem(unsigned long asce_limit) unsigned long asce_bits; int i; + /* + * Mark whole memory as no-dat. This must be done before any + * page tables are allocated, or kernel image builtin pages + * are marked as dat tables. + */ + for_each_physmem_online_range(i, &start, &end) + __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); + if (asce_limit == _REGION1_SIZE) { asce_type = _REGION2_ENTRY_EMPTY; asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; @@ -426,10 +440,12 @@ void setup_vmem(unsigned long asce_limit) asce_type = _REGION3_ENTRY_EMPTY; asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; } - s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; crst_table_init((unsigned long *)swapper_pg_dir, asce_type); crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); + __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); /* * To allow prefixing the lowcore must be mapped with 4KB pages. @@ -447,12 +463,12 @@ void setup_vmem(unsigned long asce_limit) kasan_populate_shadow(); - S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits; + S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits; S390_lowcore.user_asce = s390_invalid_asce; - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.user_asce, 7, 7); - __ctl_load(S390_lowcore.kernel_asce, 13, 13); + local_ctl_load(1, &S390_lowcore.kernel_asce); + local_ctl_load(7, &S390_lowcore.user_asce); + local_ctl_load(13, &S390_lowcore.kernel_asce); - init_mm.context.asce = S390_lowcore.kernel_asce; + init_mm.context.asce = S390_lowcore.kernel_asce.val; } diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index e6532477f126..4a6b0a8b6412 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -13,6 +13,7 @@ #define EX_TYPE_UA_LOAD_MEM 4 #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 +#define EX_TYPE_ZEROPAD 7 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -23,16 +24,7 @@ #define EX_DATA_LEN_SHIFT 8 #define EX_DATA_LEN GENMASK(11, 8) -#define __EX_TABLE(_section, _fault, _target, _type) \ - stringify_in_c(.section _section,"a";) \ - stringify_in_c(.balign 4;) \ - stringify_in_c(.long (_fault) - .;) \ - stringify_in_c(.long (_target) - .;) \ - stringify_in_c(.short (_type);) \ - stringify_in_c(.short 0;) \ - stringify_in_c(.previous) - -#define __EX_TABLE_UA(_section, _fault, _target, _type, _regerr, _regaddr, _len)\ +#define __EX_TABLE(_section, _fault, _target, _type, _regerr, _regaddr, _len) \ stringify_in_c(.section _section,"a";) \ stringify_in_c(.balign 4;) \ stringify_in_c(.long (_fault) - .;) \ @@ -72,21 +64,24 @@ stringify_in_c(.previous) #define EX_TABLE(_fault, _target) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP) + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0) #define EX_TABLE_AMODE31(_fault, _target) \ - __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP) + __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0) #define EX_TABLE_UA_STORE(_fault, _target, _regerr) \ - __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0) + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0) #define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \ - __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ - __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) #define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \ - __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0) + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0) + +#define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0) #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h deleted file mode 100644 index adf7d8cdac7e..000000000000 --- a/arch/s390/include/asm/ctl_reg.h +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright IBM Corp. 1999, 2009 - * - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - */ - -#ifndef __ASM_CTL_REG_H -#define __ASM_CTL_REG_H - -#include <linux/bits.h> - -#define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) -#define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35) -#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38) -#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39) -#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) -#define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) -#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) -#define CR0_CPU_TIMER_SUBMASK BIT(63 - 53) -#define CR0_SERVICE_SIGNAL_SUBMASK BIT(63 - 54) -#define CR0_UNUSED_56 BIT(63 - 56) -#define CR0_INTERRUPT_KEY_SUBMASK BIT(63 - 57) -#define CR0_MEASUREMENT_ALERT_SUBMASK BIT(63 - 58) - -#define CR14_UNUSED_32 BIT(63 - 32) -#define CR14_UNUSED_33 BIT(63 - 33) -#define CR14_CHANNEL_REPORT_SUBMASK BIT(63 - 35) -#define CR14_RECOVERY_SUBMASK BIT(63 - 36) -#define CR14_DEGRADATION_SUBMASK BIT(63 - 37) -#define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(63 - 38) -#define CR14_WARNING_SUBMASK BIT(63 - 39) - -#ifndef __ASSEMBLY__ - -#include <linux/bug.h> - -#define __ctl_load(array, low, high) do { \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - \ - BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ - asm volatile( \ - " lctlg %1,%2,%0\n" \ - : \ - : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high) \ - : "memory"); \ -} while (0) - -#define __ctl_store(array, low, high) do { \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - \ - BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ - asm volatile( \ - " stctg %1,%2,%0\n" \ - : "=Q" (*(addrtype *)(&array)) \ - : "i" (low), "i" (high)); \ -} while (0) - -static __always_inline void __ctl_set_bit(unsigned int cr, unsigned int bit) -{ - unsigned long reg; - - __ctl_store(reg, cr, cr); - reg |= 1UL << bit; - __ctl_load(reg, cr, cr); -} - -static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) -{ - unsigned long reg; - - __ctl_store(reg, cr, cr); - reg &= ~(1UL << bit); - __ctl_load(reg, cr, cr); -} - -void smp_ctl_set_clear_bit(int cr, int bit, bool set); - -static inline void ctl_set_bit(int cr, int bit) -{ - smp_ctl_set_clear_bit(cr, bit, true); -} - -static inline void ctl_clear_bit(int cr, int bit) -{ - smp_ctl_set_clear_bit(cr, bit, false); -} - -union ctlreg0 { - unsigned long val; - struct { - unsigned long : 8; - unsigned long tcx : 1; /* Transactional-Execution control */ - unsigned long pifo : 1; /* Transactional-Execution Program- - Interruption-Filtering Override */ - unsigned long : 3; - unsigned long ccc : 1; /* Cryptography counter control */ - unsigned long pec : 1; /* PAI extension control */ - unsigned long : 17; - unsigned long : 3; - unsigned long lap : 1; /* Low-address-protection control */ - unsigned long : 4; - unsigned long edat : 1; /* Enhanced-DAT-enablement control */ - unsigned long : 2; - unsigned long iep : 1; /* Instruction-Execution-Protection */ - unsigned long : 1; - unsigned long afp : 1; /* AFP-register control */ - unsigned long vx : 1; /* Vector enablement control */ - unsigned long : 7; - unsigned long sssm : 1; /* Service signal subclass mask */ - unsigned long : 9; - }; -}; - -union ctlreg2 { - unsigned long val; - struct { - unsigned long : 33; - unsigned long ducto : 25; - unsigned long : 1; - unsigned long gse : 1; - unsigned long : 1; - unsigned long tds : 1; - unsigned long tdc : 2; - }; -}; - -union ctlreg5 { - unsigned long val; - struct { - unsigned long : 33; - unsigned long pasteo: 25; - unsigned long : 6; - }; -}; - -union ctlreg15 { - unsigned long val; - struct { - unsigned long lsea : 61; - unsigned long : 3; - }; -}; - -#endif /* __ASSEMBLY__ */ -#endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h new file mode 100644 index 000000000000..6d4b85f2b541 --- /dev/null +++ b/arch/s390/include/asm/ctlreg.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_S390_CTLREG_H +#define __ASM_S390_CTLREG_H + +#include <linux/bits.h> + +#define CR0_TRANSACTIONAL_EXECUTION_BIT (63 - 8) +#define CR0_CLOCK_COMPARATOR_SIGN_BIT (63 - 10) +#define CR0_CRYPTOGRAPHY_COUNTER_BIT (63 - 13) +#define CR0_PAI_EXTENSION_BIT (63 - 14) +#define CR0_CPUMF_EXTRACTION_AUTH_BIT (63 - 15) +#define CR0_WARNING_TRACK_BIT (63 - 30) +#define CR0_LOW_ADDRESS_PROTECTION_BIT (63 - 35) +#define CR0_FETCH_PROTECTION_OVERRIDE_BIT (63 - 38) +#define CR0_STORAGE_PROTECTION_OVERRIDE_BIT (63 - 39) +#define CR0_EDAT_BIT (63 - 40) +#define CR0_INSTRUCTION_EXEC_PROTECTION_BIT (63 - 43) +#define CR0_VECTOR_BIT (63 - 46) +#define CR0_MALFUNCTION_ALERT_SUBMASK_BIT (63 - 48) +#define CR0_EMERGENCY_SIGNAL_SUBMASK_BIT (63 - 49) +#define CR0_EXTERNAL_CALL_SUBMASK_BIT (63 - 50) +#define CR0_CLOCK_COMPARATOR_SUBMASK_BIT (63 - 52) +#define CR0_CPU_TIMER_SUBMASK_BIT (63 - 53) +#define CR0_SERVICE_SIGNAL_SUBMASK_BIT (63 - 54) +#define CR0_UNUSED_56_BIT (63 - 56) +#define CR0_INTERRUPT_KEY_SUBMASK_BIT (63 - 57) +#define CR0_MEASUREMENT_ALERT_SUBMASK_BIT (63 - 58) +#define CR0_ETR_SUBMASK_BIT (63 - 59) +#define CR0_IUCV_BIT (63 - 62) + +#define CR0_TRANSACTIONAL_EXECUTION BIT(CR0_TRANSACTIONAL_EXECUTION_BIT) +#define CR0_CLOCK_COMPARATOR_SIGN BIT(CR0_CLOCK_COMPARATOR_SIGN_BIT) +#define CR0_CRYPTOGRAPHY_COUNTER BIT(CR0_CRYPTOGRAPHY_COUNTER_BIT) +#define CR0_PAI_EXTENSION BIT(CR0_PAI_EXTENSION_BIT) +#define CR0_CPUMF_EXTRACTION_AUTH BIT(CR0_CPUMF_EXTRACTION_AUTH_BIT) +#define CR0_WARNING_TRACK BIT(CR0_WARNING_TRACK_BIT) +#define CR0_LOW_ADDRESS_PROTECTION BIT(CR0_LOW_ADDRESS_PROTECTION_BIT) +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(CR0_FETCH_PROTECTION_OVERRIDE_BIT) +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(CR0_STORAGE_PROTECTION_OVERRIDE_BIT) +#define CR0_EDAT BIT(CR0_EDAT_BIT) +#define CR0_INSTRUCTION_EXEC_PROTECTION BIT(CR0_INSTRUCTION_EXEC_PROTECTION_BIT) +#define CR0_VECTOR BIT(CR0_VECTOR_BIT) +#define CR0_MALFUNCTION_ALERT_SUBMASK BIT(CR0_MALFUNCTION_ALERT_SUBMASK_BIT) +#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(CR0_EMERGENCY_SIGNAL_SUBMASK_BIT) +#define CR0_EXTERNAL_CALL_SUBMASK BIT(CR0_EXTERNAL_CALL_SUBMASK_BIT) +#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(CR0_CLOCK_COMPARATOR_SUBMASK_BIT) +#define CR0_CPU_TIMER_SUBMASK BIT(CR0_CPU_TIMER_SUBMASK_BIT) +#define CR0_SERVICE_SIGNAL_SUBMASK BIT(CR0_SERVICE_SIGNAL_SUBMASK_BIT) +#define CR0_UNUSED_56 BIT(CR0_UNUSED_56_BIT) +#define CR0_INTERRUPT_KEY_SUBMASK BIT(CR0_INTERRUPT_KEY_SUBMASK_BIT) +#define CR0_MEASUREMENT_ALERT_SUBMASK BIT(CR0_MEASUREMENT_ALERT_SUBMASK_BIT) +#define CR0_ETR_SUBMASK BIT(CR0_ETR_SUBMASK_BIT) +#define CR0_IUCV BIT(CR0_IUCV_BIT) + +#define CR2_MIO_ADDRESSING_BIT (63 - 58) +#define CR2_GUARDED_STORAGE_BIT (63 - 59) + +#define CR2_MIO_ADDRESSING BIT(CR2_MIO_ADDRESSING_BIT) +#define CR2_GUARDED_STORAGE BIT(CR2_GUARDED_STORAGE_BIT) + +#define CR14_UNUSED_32_BIT (63 - 32) +#define CR14_UNUSED_33_BIT (63 - 33) +#define CR14_CHANNEL_REPORT_SUBMASK_BIT (63 - 35) +#define CR14_RECOVERY_SUBMASK_BIT (63 - 36) +#define CR14_DEGRADATION_SUBMASK_BIT (63 - 37) +#define CR14_EXTERNAL_DAMAGE_SUBMASK_BIT (63 - 38) +#define CR14_WARNING_SUBMASK_BIT (63 - 39) + +#define CR14_UNUSED_32 BIT(CR14_UNUSED_32_BIT) +#define CR14_UNUSED_33 BIT(CR14_UNUSED_33_BIT) +#define CR14_CHANNEL_REPORT_SUBMASK BIT(CR14_CHANNEL_REPORT_SUBMASK_BIT) +#define CR14_RECOVERY_SUBMASK BIT(CR14_RECOVERY_SUBMASK_BIT) +#define CR14_DEGRADATION_SUBMASK BIT(CR14_DEGRADATION_SUBMASK_BIT) +#define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(CR14_EXTERNAL_DAMAGE_SUBMASK_BIT) +#define CR14_WARNING_SUBMASK BIT(CR14_WARNING_SUBMASK_BIT) + +#ifndef __ASSEMBLY__ + +#include <linux/bug.h> + +struct ctlreg { + unsigned long val; +}; + +#define __local_ctl_load(low, high, array) do { \ + struct addrtype { \ + char _[sizeof(array)]; \ + }; \ + int _high = high; \ + int _low = low; \ + int _esize; \ + \ + _esize = (_high - _low + 1) * sizeof(struct ctlreg); \ + BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \ + typecheck(struct ctlreg, array[0]); \ + asm volatile( \ + " lctlg %[_low],%[_high],%[_arr]\n" \ + : \ + : [_arr] "Q" (*(struct addrtype *)(&array)), \ + [_low] "i" (low), [_high] "i" (high) \ + : "memory"); \ +} while (0) + +#define __local_ctl_store(low, high, array) do { \ + struct addrtype { \ + char _[sizeof(array)]; \ + }; \ + int _high = high; \ + int _low = low; \ + int _esize; \ + \ + _esize = (_high - _low + 1) * sizeof(struct ctlreg); \ + BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \ + typecheck(struct ctlreg, array[0]); \ + asm volatile( \ + " stctg %[_low],%[_high],%[_arr]\n" \ + : [_arr] "=Q" (*(struct addrtype *)(&array)) \ + : [_low] "i" (low), [_high] "i" (high)); \ +} while (0) + +static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg) +{ + asm volatile( + " lctlg %[cr],%[cr],%[reg]\n" + : + : [reg] "Q" (*reg), [cr] "i" (cr) + : "memory"); +} + +static __always_inline void local_ctl_store(unsigned int cr, struct ctlreg *reg) +{ + asm volatile( + " stctg %[cr],%[cr],%[reg]\n" + : [reg] "=Q" (*reg) + : [cr] "i" (cr)); +} + +static __always_inline void local_ctl_set_bit(unsigned int cr, unsigned int bit) +{ + struct ctlreg reg; + + local_ctl_store(cr, ®); + reg.val |= 1UL << bit; + local_ctl_load(cr, ®); +} + +static __always_inline void local_ctl_clear_bit(unsigned int cr, unsigned int bit) +{ + struct ctlreg reg; + + local_ctl_store(cr, ®); + reg.val &= ~(1UL << bit); + local_ctl_load(cr, ®); +} + +struct lowcore; + +void system_ctlreg_lock(void); +void system_ctlreg_unlock(void); +void system_ctlreg_init_save_area(struct lowcore *lc); +void system_ctlreg_modify(unsigned int cr, unsigned long data, int request); + +enum { + CTLREG_SET_BIT, + CTLREG_CLEAR_BIT, + CTLREG_LOAD, +}; + +static inline void system_ctl_set_bit(unsigned int cr, unsigned int bit) +{ + system_ctlreg_modify(cr, bit, CTLREG_SET_BIT); +} + +static inline void system_ctl_clear_bit(unsigned int cr, unsigned int bit) +{ + system_ctlreg_modify(cr, bit, CTLREG_CLEAR_BIT); +} + +static inline void system_ctl_load(unsigned int cr, struct ctlreg *reg) +{ + system_ctlreg_modify(cr, reg->val, CTLREG_LOAD); +} + +union ctlreg0 { + unsigned long val; + struct ctlreg reg; + struct { + unsigned long : 8; + unsigned long tcx : 1; /* Transactional-Execution control */ + unsigned long pifo : 1; /* Transactional-Execution Program- + Interruption-Filtering Override */ + unsigned long : 3; + unsigned long ccc : 1; /* Cryptography counter control */ + unsigned long pec : 1; /* PAI extension control */ + unsigned long : 17; + unsigned long : 3; + unsigned long lap : 1; /* Low-address-protection control */ + unsigned long : 4; + unsigned long edat : 1; /* Enhanced-DAT-enablement control */ + unsigned long : 2; + unsigned long iep : 1; /* Instruction-Execution-Protection */ + unsigned long : 1; + unsigned long afp : 1; /* AFP-register control */ + unsigned long vx : 1; /* Vector enablement control */ + unsigned long : 7; + unsigned long sssm : 1; /* Service signal subclass mask */ + unsigned long : 9; + }; +}; + +union ctlreg2 { + unsigned long val; + struct ctlreg reg; + struct { + unsigned long : 33; + unsigned long ducto : 25; + unsigned long : 1; + unsigned long gse : 1; + unsigned long : 1; + unsigned long tds : 1; + unsigned long tdc : 2; + }; +}; + +union ctlreg5 { + unsigned long val; + struct ctlreg reg; + struct { + unsigned long : 33; + unsigned long pasteo: 25; + unsigned long : 6; + }; +}; + +union ctlreg15 { + unsigned long val; + struct ctlreg reg; + struct { + unsigned long lsea : 61; + unsigned long : 3; + }; +}; + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_CTLREG_H */ diff --git a/arch/s390/include/asm/fault.h b/arch/s390/include/asm/fault.h new file mode 100644 index 000000000000..d326f56603d6 --- /dev/null +++ b/arch/s390/include/asm/fault.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 1999, 2023 + */ +#ifndef _ASM_S390_FAULT_H +#define _ASM_S390_FAULT_H + +union teid { + unsigned long val; + struct { + unsigned long addr : 52; /* Translation-exception Address */ + unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ + unsigned long : 2; + unsigned long b56 : 1; + unsigned long : 3; + unsigned long b60 : 1; + unsigned long b61 : 1; + unsigned long as : 2; /* ASCE Identifier */ + }; +}; + +enum { + TEID_FSI_UNKNOWN = 0, /* Unknown whether fetch or store */ + TEID_FSI_STORE = 1, /* Exception was due to store operation */ + TEID_FSI_FETCH = 2 /* Exception was due to fetch operation */ +}; + +#endif /* _ASM_S390_FAULT_H */ diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h index bbdadb1c9efc..8634581b9011 100644 --- a/arch/s390/include/asm/fpu/internal.h +++ b/arch/s390/include/asm/fpu/internal.h @@ -10,7 +10,6 @@ #define _ASM_S390_FPU_INTERNAL_H #include <linux/string.h> -#include <asm/ctl_reg.h> #include <asm/fpu/types.h> static inline void save_vx_regs(__vector128 *vxrs) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 89902f754740..54b42817f70a 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -31,6 +31,7 @@ #include <linux/percpu.h> #include <linux/cache.h> #include <linux/types.h> +#include <asm/ctlreg.h> enum interruption_class { IRQEXT_CLK, @@ -101,17 +102,17 @@ enum irq_subclass { }; #define CR0_IRQ_SUBCLASS_MASK \ - ((1UL << (63 - 30)) /* Warning Track */ | \ - (1UL << (63 - 48)) /* Malfunction Alert */ | \ - (1UL << (63 - 49)) /* Emergency Signal */ | \ - (1UL << (63 - 50)) /* External Call */ | \ - (1UL << (63 - 52)) /* Clock Comparator */ | \ - (1UL << (63 - 53)) /* CPU Timer */ | \ - (1UL << (63 - 54)) /* Service Signal */ | \ - (1UL << (63 - 57)) /* Interrupt Key */ | \ - (1UL << (63 - 58)) /* Measurement Alert */ | \ - (1UL << (63 - 59)) /* Timing Alert */ | \ - (1UL << (63 - 62))) /* IUCV */ + (CR0_WARNING_TRACK | \ + CR0_MALFUNCTION_ALERT_SUBMASK | \ + CR0_EMERGENCY_SIGNAL_SUBMASK | \ + CR0_EXTERNAL_CALL_SUBMASK | \ + CR0_CLOCK_COMPARATOR_SUBMASK | \ + CR0_CPU_TIMER_SUBMASK | \ + CR0_SERVICE_SIGNAL_SUBMASK | \ + CR0_INTERRUPT_KEY_SUBMASK | \ + CR0_MEASUREMENT_ALERT_SUBMASK | \ + CR0_ETR_SUBMASK | \ + CR0_IUCV) void irq_subclass_register(enum irq_subclass subclass); void irq_subclass_unregister(enum irq_subclass subclass); diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 83f732ca3af4..21b9e5290c04 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -15,6 +15,7 @@ * <grundym@us.ibm.com> */ #include <linux/types.h> +#include <asm/ctlreg.h> #include <asm-generic/kprobes.h> #define BREAKPOINT_INSTRUCTION 0x0002 @@ -65,7 +66,7 @@ struct prev_kprobe { struct kprobe_ctlblk { unsigned long kprobe_status; unsigned long kprobe_saved_imask; - unsigned long kprobe_saved_ctl[3]; + struct ctlreg kprobe_saved_ctl[3]; struct prev_kprobe prev_kprobe; }; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 69ccc464a430..5dc1b6345006 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <asm/ptrace.h> +#include <asm/ctlreg.h> #include <asm/cpu.h> #include <asm/types.h> @@ -139,8 +140,8 @@ struct lowcore { __u32 restart_flags; /* 0x0384 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0388 */ - __u64 user_asce; /* 0x0390 */ + struct ctlreg kernel_asce; /* 0x0388 */ + struct ctlreg user_asce; /* 0x0390 */ /* * The lpp and current_pid fields form a @@ -199,7 +200,7 @@ struct lowcore { __u32 clock_comp_save_area[2]; /* 0x1330 */ __u64 last_break_save_area; /* 0x1338 */ __u32 access_regs_save_area[16]; /* 0x1340 */ - __u64 cregs_save_area[16]; /* 0x1380 */ + struct ctlreg cregs_save_area[16]; /* 0x1380 */ __u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */ /* Cryptography-counter designation */ __u64 ccd; /* 0x1500 */ @@ -221,12 +222,4 @@ static inline void set_prefix(__u32 address) asm volatile("spx %0" : : "Q" (address) : "memory"); } -static inline __u32 store_prefix(void) -{ - __u32 address; - - asm volatile("stpx %0" : "=Q" (address)); - return address; -} - #endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 829d68e2c685..bb1b4bef1878 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -11,7 +11,6 @@ typedef struct { cpumask_t cpu_attach_mask; atomic_t flush_count; unsigned int flush_mm; - struct list_head pgtable_list; struct list_head gmap_list; unsigned long gmap_asce; unsigned long asce; @@ -39,7 +38,6 @@ typedef struct { #define INIT_MM_CONTEXT(name) \ .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \ - .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 2a38af5a00c2..929af18b0908 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -12,7 +12,7 @@ #include <linux/uaccess.h> #include <linux/mm_types.h> #include <asm/tlbflush.h> -#include <asm/ctl_reg.h> +#include <asm/ctlreg.h> #include <asm-generic/mm_hooks.h> #define init_new_context init_new_context @@ -22,7 +22,6 @@ static inline int init_new_context(struct task_struct *tsk, unsigned long asce_type, init_entry; spin_lock_init(&mm->context.lock); - INIT_LIST_HEAD(&mm->context.pgtable_list); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); @@ -78,10 +77,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct * if (next == &init_mm) S390_lowcore.user_asce = s390_invalid_asce; else - S390_lowcore.user_asce = next->context.asce; + S390_lowcore.user_asce.val = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); /* Clear previous user-ASCE from CR7 */ - __ctl_load(s390_invalid_asce, 7, 7); + local_ctl_load(7, &s390_invalid_asce); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -111,7 +110,7 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } - __ctl_load(S390_lowcore.user_asce, 7, 7); + local_ctl_load(7, &S390_lowcore.user_asce); } #define activate_mm activate_mm @@ -120,7 +119,7 @@ static inline void activate_mm(struct mm_struct *prev, { switch_mm(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - __ctl_load(S390_lowcore.user_asce, 7, 7); + local_ctl_load(7, &S390_lowcore.user_asce); } #include <asm-generic/mmu_context.h> diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index c33c4deb545f..08fcbd628120 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -7,6 +7,9 @@ #ifndef PAGE_STATES_H #define PAGE_STATES_H +#include <asm/sections.h> +#include <asm/page.h> + #define ESSA_GET_STATE 0 #define ESSA_SET_STABLE 1 #define ESSA_SET_UNUSED 2 @@ -18,4 +21,60 @@ #define ESSA_MAX ESSA_SET_STABLE_NODAT +extern int __bootdata_preserved(cmma_flag); + +static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd) +{ + unsigned long rc; + + asm volatile( + " .insn rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0" + : [rc] "=d" (rc) + : [paddr] "d" (paddr), + [cmd] "i" (cmd)); + return rc; +} + +static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd) +{ + unsigned long paddr = __pa(addr) & PAGE_MASK; + + while (num_pages--) { + essa(paddr, cmd); + paddr += PAGE_SIZE; + } +} + +static inline void __set_page_unused(void *addr, unsigned long num_pages) +{ + __set_page_state(addr, num_pages, ESSA_SET_UNUSED); +} + +static inline void __set_page_stable_dat(void *addr, unsigned long num_pages) +{ + __set_page_state(addr, num_pages, ESSA_SET_STABLE); +} + +static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages) +{ + __set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT); +} + +static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages) +{ + if (!cmma_flag) + return; + if (cmma_flag < 2) + __set_page_stable_dat(addr, num_pages); + else + __set_page_stable_nodat(addr, num_pages); +} + +static inline void __arch_set_page_dat(void *addr, unsigned long num_pages) +{ + if (!cmma_flag) + return; + __set_page_stable_dat(addr, num_pages); +} + #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index cfec0743314e..73b9c3bf377f 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -164,7 +164,6 @@ static inline int page_reset_referenced(unsigned long addr) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); -void arch_set_page_dat(struct page *page, int order); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index b248694e0024..e91cd6bbc330 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -159,13 +159,6 @@ struct zpci_dev { unsigned long *dma_table; int tlb_refresh; - spinlock_t iommu_bitmap_lock; - unsigned long *iommu_bitmap; - unsigned long *lazy_bitmap; - unsigned long iommu_size; - unsigned long iommu_pages; - unsigned int next_bit; - struct iommu_device iommu_dev; /* IOMMU core handle */ char res_name[16]; @@ -180,10 +173,6 @@ struct zpci_dev { struct zpci_fmb *fmb; u16 fmb_update; /* update interval */ u16 fmb_length; - /* software counters */ - atomic64_t allocated_pages; - atomic64_t mapped_pages; - atomic64_t unmapped_pages; u8 version; enum pci_bus_speed max_bus_speed; diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index d6189ed14f84..f0c677ddd270 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -50,6 +50,9 @@ struct clp_fh_list_entry { #define CLP_UTIL_STR_LEN 64 #define CLP_PFIP_NR_SEGMENTS 4 +/* PCI function type numbers */ +#define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */ + extern bool zpci_unique_uid; struct clp_rsp_slpc_pci { diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 7119c04c51c5..42d7cc4262ca 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -82,117 +82,16 @@ enum zpci_ioat_dtype { #define ZPCI_TABLE_VALID_MASK 0x20 #define ZPCI_TABLE_PROT_MASK 0x200 -static inline unsigned int calc_rtx(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; -} - -static inline unsigned int calc_sx(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; -} - -static inline unsigned int calc_px(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; -} - -static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) -{ - *entry &= ZPCI_PTE_FLAG_MASK; - *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); -} - -static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) -{ - *entry &= ZPCI_RTE_FLAG_MASK; - *entry |= (sto & ZPCI_RTE_ADDR_MASK); - *entry |= ZPCI_TABLE_TYPE_RTX; -} - -static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) -{ - *entry &= ZPCI_STE_FLAG_MASK; - *entry |= (pto & ZPCI_STE_ADDR_MASK); - *entry |= ZPCI_TABLE_TYPE_SX; -} - -static inline void validate_rt_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry &= ~ZPCI_TABLE_OFFSET_MASK; - *entry |= ZPCI_TABLE_VALID; - *entry |= ZPCI_TABLE_LEN_RTX; -} - -static inline void validate_st_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry |= ZPCI_TABLE_VALID; -} - -static inline void invalidate_pt_entry(unsigned long *entry) -{ - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); - *entry &= ~ZPCI_PTE_VALID_MASK; - *entry |= ZPCI_PTE_INVALID; -} - -static inline void validate_pt_entry(unsigned long *entry) -{ - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); - *entry &= ~ZPCI_PTE_VALID_MASK; - *entry |= ZPCI_PTE_VALID; -} - -static inline void entry_set_protected(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_PROT_MASK; - *entry |= ZPCI_TABLE_PROTECTED; -} - -static inline void entry_clr_protected(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_PROT_MASK; - *entry |= ZPCI_TABLE_UNPROTECTED; -} - -static inline int reg_entry_isvalid(unsigned long entry) -{ - return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; -} - -static inline int pt_entry_isvalid(unsigned long entry) -{ - return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; -} - -static inline unsigned long *get_rt_sto(unsigned long entry) -{ - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) - return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); - else - return NULL; - -} - -static inline unsigned long *get_st_pto(unsigned long entry) -{ - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) - return phys_to_virt(entry & ZPCI_STE_ADDR_MASK); - else - return NULL; -} - -/* Prototypes */ -void dma_free_seg_table(unsigned long); -unsigned long *dma_alloc_cpu_table(gfp_t gfp); -void dma_cleanup_tables(unsigned long *); -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, - gfp_t gfp); -void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); - -extern const struct dma_map_ops s390_pci_dma_ops; +struct zpci_iommu_ctrs { + atomic64_t mapped_pages; + atomic64_t unmapped_pages; + atomic64_t global_rpcits; + atomic64_t sync_map_rpcits; + atomic64_t sync_rpcits; +}; + +struct zpci_dev; +struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev); #endif diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 376b4b23bdaa..502d655fe6ae 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -25,7 +25,6 @@ void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); struct page *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); void page_table_free_pgste(struct page *page); extern int page_table_allocate_pgste; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fb3ee7758b76..601e87fa8a9a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -18,6 +18,7 @@ #include <linux/radix-tree.h> #include <linux/atomic.h> #include <asm/sections.h> +#include <asm/ctlreg.h> #include <asm/bug.h> #include <asm/page.h> #include <asm/uv.h> @@ -25,7 +26,7 @@ extern pgd_t swapper_pg_dir[]; extern pgd_t invalid_pg_dir[]; extern void paging_init(void); -extern unsigned long s390_invalid_asce; +extern struct ctlreg s390_invalid_asce; enum { PG_DIRECT_MAP_4K = 0, diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 25cadc2b9cff..df316436d2e1 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -125,9 +125,6 @@ static inline void vmcp_cma_reserve(void) { } void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); -void cmma_init(void); -void cmma_init_nodat(void); - extern void (*_machine_restart)(char *command); extern void (*_machine_halt)(void); extern void (*_machine_power_off)(void); diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 73ed2781073b..6e5b1b4b19a9 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -59,7 +59,6 @@ static inline void smp_cpus_done(unsigned int max_cpus) { } -extern int smp_reinit_ipl_cpu(void); extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 78f7b729b65f..31ec4f545e03 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -6,6 +6,13 @@ #include <linux/ptrace.h> #include <asm/switch_to.h> +struct stack_frame_user { + unsigned long back_chain; + unsigned long empty1[5]; + unsigned long gprs[10]; + unsigned long empty2[4]; +}; + enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 383b1f91442c..d1455a601adc 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -69,12 +69,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_pmds = 1; - /* - * page_table_free_rcu takes care of the allocation bit masks - * of the 2K table fragments in the 4K page table page, - * then calls tlb_remove_table. - */ - page_table_free_rcu(tlb, (unsigned long *) pte, address); + if (mm_alloc_pgste(tlb->mm)) + gmap_unlink(tlb->mm, (unsigned long *)pte, address); + tlb_remove_ptdesc(tlb, pte); } /* @@ -112,7 +109,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb_remove_table(tlb, p4d); + tlb_remove_ptdesc(tlb, p4d); } /* @@ -130,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_p4ds = 1; - tlb_remove_table(tlb, pud); + tlb_remove_ptdesc(tlb, pud); } diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 8a8c64a678c4..81ae8a98e7ec 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -15,7 +15,6 @@ */ #include <asm/asm-extable.h> #include <asm/processor.h> -#include <asm/ctl_reg.h> #include <asm/extable.h> #include <asm/facility.h> #include <asm-generic/access_ok.h> diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..2579f1694b82 --- /dev/null +++ b/arch/s390/include/asm/word-at-a-time.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +#include <linux/kernel.h> +#include <asm/asm-extable.h> +#include <asm/bitsperlong.h> + +struct word_at_a_time { + const unsigned long bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x7f) } + +static inline unsigned long prep_zero_mask(unsigned long val, unsigned long data, const struct word_at_a_time *c) +{ + return data; +} + +static inline unsigned long create_zero_mask(unsigned long data) +{ + return __fls(data); +} + +static inline unsigned long find_zero(unsigned long data) +{ + return (data ^ (BITS_PER_LONG - 1)) >> 3; +} + +static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +{ + unsigned long mask = (val & c->bits) + c->bits; + + *data = ~(mask | val | c->bits); + return *data; +} + +static inline unsigned long zero_bytemask(unsigned long data) +{ + return ~1UL << data; +} + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long data; + + asm volatile( + "0: lg %[data],0(%[addr])\n" + "1: nopr %%r7\n" + EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr]) + EX_TABLE_ZEROPAD(1b, 1b, %[data], %[addr]) + : [data] "=d" (data) + : [addr] "a" (addr), "m" (*(unsigned long *)addr)); + return data; +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 0df2b88cc0da..353def93973b 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -37,7 +37,7 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o -obj-y += sysinfo.o lgr.o os_info.o +obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 7af69948b290..514feadd4c58 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -498,7 +498,7 @@ static int get_mem_chunk_cnt(void) /* * Initialize ELF loads (new kernel) */ -static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) +static void loads_init(Elf64_Phdr *phdr) { phys_addr_t start, end; u64 idx; @@ -507,7 +507,7 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; phdr->p_offset = start; - phdr->p_vaddr = start; + phdr->p_vaddr = (unsigned long)__va(start); phdr->p_paddr = start; phdr->p_memsz = end - start; phdr->p_flags = PF_R | PF_W | PF_X; @@ -612,7 +612,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); /* Init loads */ hdr_off = PTR_DIFF(ptr, hdr); - loads_init(phdr_loads, hdr_off); + loads_init(phdr_loads); *addr = (unsigned long long) hdr; *size = (unsigned long long) hdr_off; BUG_ON(elfcorehdr_size > alloc_size); diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c new file mode 100644 index 000000000000..8cc26cf2c64a --- /dev/null +++ b/arch/s390/kernel/ctlreg.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 1999, 2023 + */ + +#include <linux/irqflags.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/cache.h> +#include <asm/abs_lowcore.h> +#include <asm/ctlreg.h> + +/* + * ctl_lock guards access to global control register contents which + * are kept in the control register save area within absolute lowcore + * at physical address zero. + */ +static DEFINE_SPINLOCK(system_ctl_lock); + +void system_ctlreg_lock(void) + __acquires(&system_ctl_lock) +{ + spin_lock(&system_ctl_lock); +} + +void system_ctlreg_unlock(void) + __releases(&system_ctl_lock) +{ + spin_unlock(&system_ctl_lock); +} + +static bool system_ctlreg_area_init __ro_after_init; + +void __init system_ctlreg_init_save_area(struct lowcore *lc) +{ + struct lowcore *abs_lc; + + abs_lc = get_abs_lowcore(); + __local_ctl_store(0, 15, lc->cregs_save_area); + __local_ctl_store(0, 15, abs_lc->cregs_save_area); + put_abs_lowcore(abs_lc); + system_ctlreg_area_init = true; +} + +struct ctlreg_parms { + unsigned long andval; + unsigned long orval; + unsigned long val; + int request; + int cr; +}; + +static void ctlreg_callback(void *info) +{ + struct ctlreg_parms *pp = info; + struct ctlreg regs[16]; + + __local_ctl_store(0, 15, regs); + if (pp->request == CTLREG_LOAD) { + regs[pp->cr].val = pp->val; + } else { + regs[pp->cr].val &= pp->andval; + regs[pp->cr].val |= pp->orval; + } + __local_ctl_load(0, 15, regs); +} + +static void system_ctlreg_update(void *info) +{ + unsigned long flags; + + if (system_state == SYSTEM_BOOTING) { + /* + * For very early calls do not call on_each_cpu() + * since not everything might be setup. + */ + local_irq_save(flags); + ctlreg_callback(info); + local_irq_restore(flags); + } else { + on_each_cpu(ctlreg_callback, info, 1); + } +} + +void system_ctlreg_modify(unsigned int cr, unsigned long data, int request) +{ + struct ctlreg_parms pp = { .cr = cr, .request = request, }; + struct lowcore *abs_lc; + + switch (request) { + case CTLREG_SET_BIT: + pp.orval = 1UL << data; + pp.andval = -1UL; + break; + case CTLREG_CLEAR_BIT: + pp.orval = 0; + pp.andval = ~(1UL << data); + break; + case CTLREG_LOAD: + pp.val = data; + break; + } + if (system_ctlreg_area_init) { + system_ctlreg_lock(); + abs_lc = get_abs_lowcore(); + if (request == CTLREG_LOAD) { + abs_lc->cregs_save_area[cr].val = pp.val; + } else { + abs_lc->cregs_save_area[cr].val &= pp.andval; + abs_lc->cregs_save_area[cr].val |= pp.orval; + } + put_abs_lowcore(abs_lc); + system_ctlreg_update(&pp); + system_ctlreg_unlock(); + } else { + system_ctlreg_update(&pp); + } +} +EXPORT_SYMBOL(system_ctlreg_modify); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index f9f06cd8fcee..92fdc35f028c 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -245,6 +245,7 @@ EXPORT_SYMBOL(diag8c); int diag224(void *ptr) { + unsigned long addr = __pa(ptr); int rc = -EOPNOTSUPP; diag_stat_inc(DIAG_STAT_X224); @@ -253,7 +254,7 @@ int diag224(void *ptr) "0: lhi %0,0x0\n" "1:\n" EX_TABLE(0b,1b) - : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); + : "+d" (rc) :"d" (0), "d" (addr) : "memory"); return rc; } EXPORT_SYMBOL(diag224); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 442ce0489e1a..eb43e5922a25 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -46,6 +46,7 @@ decompressor_handled_param(vmalloc); decompressor_handled_param(dfltcc); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); +decompressor_handled_param(cmma); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -216,7 +217,7 @@ static __init void detect_machine_facilities(void) { if (test_facility(8)) { S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1; - __ctl_set_bit(0, 23); + system_ctl_set_bit(0, CR0_EDAT_BIT); } if (test_facility(78)) S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; @@ -224,13 +225,13 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; if (test_facility(50) && test_facility(73)) { S390_lowcore.machine_flags |= MACHINE_FLAG_TE; - __ctl_set_bit(0, 55); + system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); } if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; if (test_facility(129)) { S390_lowcore.machine_flags |= MACHINE_FLAG_VX; - __ctl_set_bit(0, 17); + system_ctl_set_bit(0, CR0_VECTOR_BIT); } if (test_facility(130)) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; @@ -240,7 +241,7 @@ static __init void detect_machine_facilities(void) /* Enabled signed clock comparator comparisons */ S390_lowcore.machine_flags |= MACHINE_FLAG_SCC; clock_comparator_max = -1ULL >> 1; - __ctl_set_bit(0, 53); + system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); } if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO; @@ -258,15 +259,9 @@ static inline void save_vector_registers(void) #endif } -static inline void setup_control_registers(void) +static inline void setup_low_address_protection(void) { - unsigned long reg; - - __ctl_store(reg, 0, 0); - reg |= CR0_LOW_ADDRESS_PROTECTION; - reg |= CR0_EMERGENCY_SIGNAL_SUBMASK; - reg |= CR0_EXTERNAL_CALL_SUBMASK; - __ctl_load(reg, 0, 0); + system_ctl_set_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT); } static inline void setup_access_registers(void) @@ -279,7 +274,7 @@ static inline void setup_access_registers(void) static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; - __ctl_clear_bit(0, 17); + system_ctl_clear_bit(0, CR0_VECTOR_BIT); return 0; } early_param("novx", disable_vector_extension); @@ -314,7 +309,7 @@ void __init startup_init(void) save_vector_registers(); setup_topology(); sclp_early_detect(); - setup_control_registers(); + setup_low_address_protection(); setup_access_registers(); lockdep_on(); } diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index d14dd1c2e524..0b68168d9566 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -28,7 +28,7 @@ static int gs_enable(void) return -ENOMEM; gs_cb->gsd = 25; preempt_disable(); - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); load_gs_cb(gs_cb); current->thread.gs_cb = gs_cb; preempt_enable(); @@ -42,7 +42,7 @@ static int gs_disable(void) preempt_disable(); kfree(current->thread.gs_cb); current->thread.gs_cb = NULL; - __ctl_clear_bit(2, 4); + local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT); preempt_enable(); } return 0; @@ -84,7 +84,7 @@ void gs_load_bc_cb(struct pt_regs *regs) if (gs_cb) { kfree(current->thread.gs_cb); current->thread.gs_bc_cb = NULL; - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); load_gs_cb(gs_cb); current->thread.gs_cb = gs_cb; } diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 05e51666db03..cc364fce6aa9 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2381,7 +2381,7 @@ void s390_reset_system(void) set_prefix(0); /* Disable lowcore protection */ - __ctl_clear_bit(0, 28); + local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT); diag_amode31_ops.diag308_reset(); } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index b020ff17d206..6f71b0ce1068 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -385,7 +385,7 @@ void irq_subclass_register(enum irq_subclass subclass) { spin_lock(&irq_subclass_lock); if (!irq_subclass_refcount[subclass]) - ctl_set_bit(0, subclass); + system_ctl_set_bit(0, subclass); irq_subclass_refcount[subclass]++; spin_unlock(&irq_subclass_lock); } @@ -396,7 +396,7 @@ void irq_subclass_unregister(enum irq_subclass subclass) spin_lock(&irq_subclass_lock); irq_subclass_refcount[subclass]--; if (!irq_subclass_refcount[subclass]) - ctl_clear_bit(0, subclass); + system_ctl_clear_bit(0, subclass); spin_unlock(&irq_subclass_lock); } EXPORT_SYMBOL(irq_subclass_unregister); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d4b863ed0aa7..f0cf20d4b3c5 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -224,20 +224,27 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb, struct pt_regs *regs, unsigned long ip) { - struct per_regs per_kprobe; + union { + struct ctlreg regs[3]; + struct { + struct ctlreg control; + struct ctlreg start; + struct ctlreg end; + }; + } per_kprobe; /* Set up the PER control registers %cr9-%cr11 */ - per_kprobe.control = PER_EVENT_IFETCH; - per_kprobe.start = ip; - per_kprobe.end = ip; + per_kprobe.control.val = PER_EVENT_IFETCH; + per_kprobe.start.val = ip; + per_kprobe.end.val = ip; /* Save control regs and psw mask */ - __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + __local_ctl_store(9, 11, kcb->kprobe_saved_ctl); kcb->kprobe_saved_imask = regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); /* Set PER control regs, turns on single step for the given address */ - __ctl_load(per_kprobe, 9, 11); + __local_ctl_load(9, 11, per_kprobe.regs); regs->psw.mask |= PSW_MASK_PER; regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); regs->psw.addr = ip; @@ -249,7 +256,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb, unsigned long ip) { /* Restore control regs and psw mask, set new psw address */ - __ctl_load(kcb->kprobe_saved_ctl, 9, 11); + __local_ctl_load(9, 11, kcb->kprobe_saved_ctl); regs->psw.mask &= ~PSW_MASK_PER; regs->psw.mask |= kcb->kprobe_saved_imask; regs->psw.addr = ip; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index ce65fc01671f..bb0d4d68fcbe 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -94,12 +94,12 @@ static noinline void __machine_kdump(void *image) if (MACHINE_HAS_VX) save_vx_regs((__vector128 *) mcesa->vector_save_area); if (MACHINE_HAS_GS) { - __ctl_store(cr2_old.val, 2, 2); + local_ctl_store(2, &cr2_old.reg); cr2_new = cr2_old; cr2_new.gse = 1; - __ctl_load(cr2_new.val, 2, 2); + local_ctl_load(2, &cr2_new.reg); save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); - __ctl_load(cr2_old.val, 2, 2); + local_ctl_load(2, &cr2_old.reg); } /* * To create a good backchain for this CPU in the dump store_status diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 38ec0487521c..0daf0f1cdfc9 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -22,13 +22,13 @@ #include <linux/kvm_host.h> #include <linux/export.h> #include <asm/lowcore.h> +#include <asm/ctlreg.h> #include <asm/smp.h> #include <asm/stp.h> #include <asm/cputime.h> #include <asm/nmi.h> #include <asm/crw.h> #include <asm/switch_to.h> -#include <asm/ctl_reg.h> #include <asm/asm-offsets.h> #include <asm/pai.h> #include <asm/vx-insn.h> @@ -131,10 +131,10 @@ static notrace void s390_handle_damage(void) * Disable low address protection and make machine check new PSW a * disabled wait PSW. Any additional machine check cannot be handled. */ - __ctl_store(cr0.val, 0, 0); + local_ctl_store(0, &cr0.reg); cr0_new = cr0; cr0_new.lap = 0; - __ctl_load(cr0_new.val, 0, 0); + local_ctl_load(0, &cr0_new.reg); psw_save = S390_lowcore.mcck_new_psw; psw_bits(S390_lowcore.mcck_new_psw).io = 0; psw_bits(S390_lowcore.mcck_new_psw).ext = 0; @@ -146,7 +146,7 @@ static notrace void s390_handle_damage(void) * values. This makes possible system dump analysis easier. */ S390_lowcore.mcck_new_psw = psw_save; - __ctl_load(cr0.val, 0, 0); + local_ctl_load(0, &cr0.reg); disabled_wait(); while (1); } @@ -185,7 +185,7 @@ void s390_handle_mcck(void) static int mchchk_wng_posted = 0; /* Use single cpu clear, as we cannot handle smp here. */ - __ctl_clear_bit(14, 24); /* Disable WARNING MCH */ + local_ctl_clear_bit(14, CR14_WARNING_SUBMASK_BIT); if (xchg(&mchchk_wng_posted, 1) == 0) kill_cad_pid(SIGPWR, 1); } @@ -269,9 +269,9 @@ static int notrace s390_validate_registers(union mci mci) */ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) kill_task = 1; - cr0.val = S390_lowcore.cregs_save_area[0]; + cr0.reg = S390_lowcore.cregs_save_area[0]; cr0.afp = cr0.vx = 1; - __ctl_load(cr0.val, 0, 0); + local_ctl_load(0, &cr0.reg); asm volatile( " la 1,%0\n" " VLM 0,15,0,1\n" @@ -279,7 +279,7 @@ static int notrace s390_validate_registers(union mci mci) : : "Q" (*(struct vx_array *)mcesa->vector_save_area) : "1"); - __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); + local_ctl_load(0, &S390_lowcore.cregs_save_area[0]); } /* Validate access registers */ asm volatile( @@ -290,7 +290,7 @@ static int notrace s390_validate_registers(union mci mci) if (!mci.ar) kill_task = 1; /* Validate guarded storage registers */ - cr2.val = S390_lowcore.cregs_save_area[2]; + cr2.reg = S390_lowcore.cregs_save_area[2]; if (cr2.gse) { if (!mci.gs) { /* @@ -505,9 +505,9 @@ NOKPROBE_SYMBOL(s390_do_machine_check); static int __init machine_check_init(void) { - ctl_set_bit(14, 25); /* enable external damage MCH */ - ctl_set_bit(14, 27); /* enable system recovery MCH */ - ctl_set_bit(14, 24); /* enable warning MCH */ + system_ctl_set_bit(14, CR14_EXTERNAL_DAMAGE_SUBMASK_BIT); + system_ctl_set_bit(14, CR14_RECOVERY_SUBMASK_BIT); + system_ctl_set_bit(14, CR14_WARNING_SUBMASK_BIT); return 0; } early_initcall(machine_check_init); diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 850c11ea631a..41ed6e0f0a2a 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1193,7 +1193,7 @@ static int __init cpumf_pmu_init(void) * Clear bit 15 of cr0 to unauthorize problem-state to * extract measurement counters */ - ctl_clear_bit(0, 48); + system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT); /* register handler for measurement-alert interruptions */ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index c27321cb0969..dfa77da2fd2e 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -15,7 +15,10 @@ #include <linux/export.h> #include <linux/seq_file.h> #include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/compat.h> #include <linux/sysfs.h> +#include <asm/stacktrace.h> #include <asm/irq.h> #include <asm/cpu_mf.h> #include <asm/lowcore.h> @@ -212,6 +215,44 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, } } +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct stack_frame_user __user *sf; + unsigned long ip, sp; + bool first = true; + + if (is_compat_task()) + return; + perf_callchain_store(entry, instruction_pointer(regs)); + sf = (void __user *)user_stack_pointer(regs); + pagefault_disable(); + while (entry->nr < entry->max_stack) { + if (__get_user(sp, &sf->back_chain)) + break; + if (__get_user(ip, &sf->gprs[8])) + break; + if (ip & 0x1) { + /* + * If the instruction address is invalid, and this + * is the first stack frame, assume r14 has not + * been written to the stack yet. Otherwise exit. + */ + if (first && !(regs->gprs[14] & 0x1)) + ip = regs->gprs[14]; + else + break; + } + perf_callchain_store(entry, ip); + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (!sp || sp & 0x7) + break; + sf = (void __user *)sp; + first = false; + } + pagefault_enable(); +} + /* Perf definitions for PMU event attributes in sysfs */ ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index fe7d1774ded1..77fd24e6cbb6 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -16,8 +16,7 @@ #include <linux/export.h> #include <linux/io.h> #include <linux/perf_event.h> - -#include <asm/ctl_reg.h> +#include <asm/ctlreg.h> #include <asm/pai.h> #include <asm/debug.h> @@ -41,7 +40,43 @@ struct paicrypt_map { struct perf_event *event; /* Perf event for sampling */ }; -static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map); +struct paicrypt_mapptr { + struct paicrypt_map *mapptr; +}; + +static struct paicrypt_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct paicrypt_mapptr __percpu *mapptr; +} paicrypt_root; + +/* Free per CPU data when the last event is removed. */ +static void paicrypt_root_free(void) +{ + if (refcount_dec_and_test(&paicrypt_root.refcnt)) { + free_percpu(paicrypt_root.mapptr); + paicrypt_root.mapptr = NULL; + } + debug_sprintf_event(cfm_dbg, 5, "%s root.refcount %d\n", __func__, + refcount_read(&paicrypt_root.refcnt)); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int paicrypt_root_alloc(void) +{ + if (!refcount_inc_not_zero(&paicrypt_root.refcnt)) { + /* The memory is already zeroed. */ + paicrypt_root.mapptr = alloc_percpu(struct paicrypt_mapptr); + if (!paicrypt_root.mapptr) + return -ENOMEM; + refcount_set(&paicrypt_root.refcnt, 1); + } + return 0; +} /* Release the PMU if event is the last perf event */ static DEFINE_MUTEX(pai_reserve_mutex); @@ -51,7 +86,9 @@ static DEFINE_MUTEX(pai_reserve_mutex); */ static void paicrypt_event_destroy(struct perf_event *event) { - struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu); + struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, + event->cpu); + struct paicrypt_map *cpump = mp->mapptr; cpump->event = NULL; static_branch_dec(&pai_key); @@ -66,11 +103,11 @@ static void paicrypt_event_destroy(struct perf_event *event) __func__, (unsigned long)cpump->page, cpump->save); free_page((unsigned long)cpump->page); - cpump->page = NULL; kvfree(cpump->save); - cpump->save = NULL; - cpump->mode = PAI_MODE_NONE; + kfree(cpump); + mp->mapptr = NULL; } + paicrypt_root_free(); mutex_unlock(&pai_reserve_mutex); } @@ -86,7 +123,8 @@ static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel) */ static u64 paicrypt_getdata(struct perf_event *event, bool kernel) { - struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map); + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; u64 sum = 0; int i; @@ -132,11 +170,31 @@ static u64 paicrypt_getall(struct perf_event *event) * * Allocate the memory for the event. */ -static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) +static struct paicrypt_map *paicrypt_busy(struct perf_event *event) { - int rc = 0; + struct perf_event_attr *a = &event->attr; + struct paicrypt_map *cpump = NULL; + struct paicrypt_mapptr *mp; + int rc; mutex_lock(&pai_reserve_mutex); + + /* Allocate root node */ + rc = paicrypt_root_alloc(); + if (rc) + goto unlock; + + /* Allocate node for this event */ + mp = per_cpu_ptr(paicrypt_root.mapptr, event->cpu); + cpump = mp->mapptr; + if (!cpump) { /* Paicrypt_map allocated? */ + cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); + if (!cpump) { + rc = -ENOMEM; + goto free_root; + } + } + if (a->sample_period) { /* Sampling requested */ if (cpump->mode != PAI_MODE_NONE) rc = -EBUSY; /* ... sampling/counting active */ @@ -144,8 +202,15 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) if (cpump->mode == PAI_MODE_SAMPLING) rc = -EBUSY; /* ... and sampling active */ } + /* + * This error case triggers when there is a conflict: + * Either sampling requested and counting already active, or visa + * versa. Therefore the struct paicrypto_map for this CPU is + * needed or the error could not have occurred. Only adjust root + * node refcount. + */ if (rc) - goto unlock; + goto free_root; /* Allocate memory for counter page and counter extraction. * Only the first counting event has to allocate a page. @@ -158,30 +223,36 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) rc = -ENOMEM; cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (!cpump->page) - goto unlock; + goto free_paicrypt_map; cpump->save = kvmalloc_array(paicrypt_cnt + 1, sizeof(struct pai_userdata), GFP_KERNEL); if (!cpump->save) { free_page((unsigned long)cpump->page); cpump->page = NULL; - goto unlock; + goto free_paicrypt_map; } + + /* Set mode and reference count */ rc = 0; refcount_set(&cpump->refcnt, 1); - -unlock: - /* If rc is non-zero, do not set mode and reference count */ - if (!rc) { - cpump->mode = a->sample_period ? PAI_MODE_SAMPLING - : PAI_MODE_COUNTING; - } + cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; + mp->mapptr = cpump; debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" " mode %d refcnt %u page %#lx save %p rc %d\n", __func__, a->sample_period, cpump->active_events, cpump->mode, refcount_read(&cpump->refcnt), (unsigned long)cpump->page, cpump->save, rc); + goto unlock; + +free_paicrypt_map: + kfree(cpump); + mp->mapptr = NULL; +free_root: + paicrypt_root_free(); + +unlock: mutex_unlock(&pai_reserve_mutex); - return rc; + return rc ? ERR_PTR(rc) : cpump; } /* Might be called on different CPU than the one the event is intended for. */ @@ -189,7 +260,6 @@ static int paicrypt_event_init(struct perf_event *event) { struct perf_event_attr *a = &event->attr; struct paicrypt_map *cpump; - int rc; /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) @@ -199,16 +269,15 @@ static int paicrypt_event_init(struct perf_event *event) a->config > PAI_CRYPTO_BASE + paicrypt_cnt) return -EINVAL; /* Allow only CPU wide operation, no process context for now. */ - if (event->hw.target || event->cpu == -1) + if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1) return -ENOENT; /* Allow only CRYPTO_ALL for sampling. */ if (a->sample_period && a->config != PAI_CRYPTO_BASE) return -EINVAL; - cpump = per_cpu_ptr(&paicrypt_map, event->cpu); - rc = paicrypt_busy(a, cpump); - if (rc) - return rc; + cpump = paicrypt_busy(event); + if (IS_ERR(cpump)) + return PTR_ERR(cpump); /* Event initialization sets last_tag to 0. When later on the events * are deleted and re-added, do not reset the event count value to zero. @@ -216,7 +285,6 @@ static int paicrypt_event_init(struct perf_event *event) * are active at the same time. */ event->hw.last_tag = 0; - cpump->event = event; event->destroy = paicrypt_event_destroy; if (a->sample_period) { @@ -253,20 +321,20 @@ static void paicrypt_start(struct perf_event *event, int flags) if (!event->hw.last_tag) { event->hw.last_tag = 1; sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->count, 0); local64_set(&event->hw.prev_count, sum); } } static int paicrypt_add(struct perf_event *event, int flags) { - struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map); + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; unsigned long ccd; if (++cpump->active_events == 1) { ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET; WRITE_ONCE(S390_lowcore.ccd, ccd); - __ctl_set_bit(0, 50); + local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); } cpump->event = event; if (flags & PERF_EF_START && !event->attr.sample_period) { @@ -287,7 +355,8 @@ static void paicrypt_stop(struct perf_event *event, int flags) static void paicrypt_del(struct perf_event *event, int flags) { - struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map); + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; if (event->attr.sample_period) perf_sched_cb_dec(event->pmu); @@ -295,7 +364,7 @@ static void paicrypt_del(struct perf_event *event, int flags) /* Only counting needs to read counter */ paicrypt_stop(event, PERF_EF_UPDATE); if (--cpump->active_events == 0) { - __ctl_clear_bit(0, 50); + local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); WRITE_ONCE(S390_lowcore.ccd, 0); } } @@ -329,7 +398,8 @@ static size_t paicrypt_copy(struct pai_userdata *userdata, static int paicrypt_push_sample(void) { - struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map); + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; struct perf_event *event = cpump->event; struct perf_sample_data data; struct perf_raw_record raw; diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index c57c1a203256..8ba0f1a3a39d 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -17,8 +17,7 @@ #include <linux/export.h> #include <linux/io.h> #include <linux/perf_event.h> - -#include <asm/ctl_reg.h> +#include <asm/ctlreg.h> #include <asm/pai.h> #include <asm/debug.h> @@ -249,7 +248,7 @@ static int paiext_event_init(struct perf_event *event) if (rc) return rc; /* Allow only CPU wide operation, no process context for now. */ - if (event->hw.target || event->cpu == -1) + if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1) return -ENOENT; /* Allow only event NNPA_ALL for sampling. */ if (a->sample_period && a->config != PAI_NNPA_BASE) @@ -327,7 +326,6 @@ static void paiext_start(struct perf_event *event, int flags) event->hw.last_tag = 1; sum = paiext_getall(event); /* Get current value */ local64_set(&event->hw.prev_count, sum); - local64_set(&event->count, 0); } static int paiext_add(struct perf_event *event, int flags) @@ -340,7 +338,7 @@ static int paiext_add(struct perf_event *event, int flags) S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb); pcb->acc = virt_to_phys(cpump->area) | 0x1; /* Enable CPU instruction lookup for PAIE1 control block */ - __ctl_set_bit(0, 49); + local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", __func__, S390_lowcore.aicd, pcb->acc); } @@ -376,7 +374,7 @@ static void paiext_del(struct perf_event *event, int flags) } if (--cpump->active_events == 0) { /* Disable CPU instruction lookup for PAIE1 control block */ - __ctl_clear_bit(0, 49); + local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); pcb->acc = 0; S390_lowcore.aicd = 0; debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ea244a73efad..046403471c5d 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -41,13 +41,20 @@ void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - struct per_regs old, new; union ctlreg0 cr0_old, cr0_new; union ctlreg2 cr2_old, cr2_new; int cr0_changed, cr2_changed; - - __ctl_store(cr0_old.val, 0, 0); - __ctl_store(cr2_old.val, 2, 2); + union { + struct ctlreg regs[3]; + struct { + struct ctlreg control; + struct ctlreg start; + struct ctlreg end; + }; + } old, new; + + local_ctl_store(0, &cr0_old.reg); + local_ctl_store(2, &cr2_old.reg); cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ @@ -75,38 +82,38 @@ void update_cr_regs(struct task_struct *task) cr0_changed = cr0_new.val != cr0_old.val; cr2_changed = cr2_new.val != cr2_old.val; if (cr0_changed) - __ctl_load(cr0_new.val, 0, 0); + local_ctl_load(0, &cr0_new.reg); if (cr2_changed) - __ctl_load(cr2_new.val, 2, 2); + local_ctl_load(2, &cr2_new.reg); /* Copy user specified PER registers */ - new.control = thread->per_user.control; - new.start = thread->per_user.start; - new.end = thread->per_user.end; + new.control.val = thread->per_user.control; + new.start.val = thread->per_user.start; + new.end.val = thread->per_user.end; /* merge TIF_SINGLE_STEP into user specified PER registers. */ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) || test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) { if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) - new.control |= PER_EVENT_BRANCH; + new.control.val |= PER_EVENT_BRANCH; else - new.control |= PER_EVENT_IFETCH; - new.control |= PER_CONTROL_SUSPENSION; - new.control |= PER_EVENT_TRANSACTION_END; + new.control.val |= PER_EVENT_IFETCH; + new.control.val |= PER_CONTROL_SUSPENSION; + new.control.val |= PER_EVENT_TRANSACTION_END; if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) - new.control |= PER_EVENT_IFETCH; - new.start = 0; - new.end = -1UL; + new.control.val |= PER_EVENT_IFETCH; + new.start.val = 0; + new.end.val = -1UL; } /* Take care of the PER enablement bit in the PSW. */ - if (!(new.control & PER_EVENT_MASK)) { + if (!(new.control.val & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; - __ctl_store(old, 9, 11); + __local_ctl_store(9, 11, old.regs); if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(new, 9, 11); + __local_ctl_load(9, 11, new.regs); } void user_enable_single_step(struct task_struct *task) @@ -1107,7 +1114,7 @@ static int s390_gs_cb_set(struct task_struct *target, target->thread.gs_cb = data; *target->thread.gs_cb = gs_cb; if (target == current) { - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); restore_gs_cb(target->thread.gs_cb); } preempt_enable(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e555b576d3c8..5701356f4f33 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,7 +305,7 @@ static void __init setup_zfcpdump(void) return; if (oldmem_data.start) return; - strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); + strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE); console_loglevel = 2; } #else @@ -381,12 +381,6 @@ void stack_free(unsigned long stack) #endif } -void __init __noreturn arch_call_rest_init(void) -{ - smp_reinit_ipl_cpu(); - rest_init(); -} - static unsigned long __init stack_alloc_early(void) { unsigned long stack; @@ -455,7 +449,6 @@ static void __init setup_lowcore(void) lc->restart_fn = (unsigned long) do_restart; lc->restart_data = 0; lc->restart_source = -1U; - __ctl_store(lc->cregs_save_area, 0, 15); lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); @@ -465,6 +458,7 @@ static void __init setup_lowcore(void) lc->kernel_asce = S390_lowcore.kernel_asce; lc->user_asce = S390_lowcore.user_asce; + system_ctlreg_init_save_area(lc); abs_lc = get_abs_lowcore(); abs_lc->restart_stack = lc->restart_stack; abs_lc->restart_fn = lc->restart_fn; @@ -472,7 +466,6 @@ static void __init setup_lowcore(void) abs_lc->restart_source = lc->restart_source; abs_lc->restart_psw = lc->restart_psw; abs_lc->restart_flags = RESTART_FLAG_CTLREGS; - memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area)); abs_lc->program_new_psw = lc->program_new_psw; abs_lc->mcesad = lc->mcesad; put_abs_lowcore(abs_lc); @@ -797,15 +790,15 @@ static void __init setup_cr(void) __ctl_duct[4] = (unsigned long)__ctl_duald; /* Update control registers CR2, CR5 and CR15 */ - __ctl_store(cr2.val, 2, 2); - __ctl_store(cr5.val, 5, 5); - __ctl_store(cr15.val, 15, 15); + local_ctl_store(2, &cr2.reg); + local_ctl_store(5, &cr5.reg); + local_ctl_store(15, &cr15.reg); cr2.ducto = (unsigned long)__ctl_duct >> 6; cr5.pasteo = (unsigned long)__ctl_duct >> 6; cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; - __ctl_load(cr2.val, 2, 2); - __ctl_load(cr5.val, 5, 5); - __ctl_load(cr15.val, 15, 15); + system_ctl_load(2, &cr2.reg); + system_ctl_load(5, &cr5.reg); + system_ctl_load(15, &cr15.reg); } /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 214a1b67f80a..f7fcfff09acf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -37,6 +37,7 @@ #include <linux/crash_dump.h> #include <linux/kprobes.h> #include <asm/asm-offsets.h> +#include <asm/ctlreg.h> #include <asm/pfault.h> #include <asm/diag.h> #include <asm/switch_to.h> @@ -567,54 +568,6 @@ void arch_irq_work_raise(void) } #endif -/* - * parameter area for the set/clear control bit callbacks - */ -struct ec_creg_mask_parms { - unsigned long orval; - unsigned long andval; - int cr; -}; - -/* - * callback for setting/clearing control bits - */ -static void smp_ctl_bit_callback(void *info) -{ - struct ec_creg_mask_parms *pp = info; - unsigned long cregs[16]; - - __ctl_store(cregs, 0, 15); - cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; - __ctl_load(cregs, 0, 15); -} - -static DEFINE_SPINLOCK(ctl_lock); - -void smp_ctl_set_clear_bit(int cr, int bit, bool set) -{ - struct ec_creg_mask_parms parms = { .cr = cr, }; - struct lowcore *abs_lc; - u64 ctlreg; - - if (set) { - parms.orval = 1UL << bit; - parms.andval = -1UL; - } else { - parms.orval = 0; - parms.andval = ~(1UL << bit); - } - spin_lock(&ctl_lock); - abs_lc = get_abs_lowcore(); - ctlreg = abs_lc->cregs_save_area[cr]; - ctlreg = (ctlreg & parms.andval) | parms.orval; - abs_lc->cregs_save_area[cr] = ctlreg; - put_abs_lowcore(abs_lc); - on_each_cpu(smp_ctl_bit_callback, &parms, 1); - spin_unlock(&ctl_lock); -} -EXPORT_SYMBOL(smp_ctl_set_clear_bit); - #ifdef CONFIG_CRASH_DUMP int smp_store_status(int cpu) @@ -935,14 +888,14 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) * Make sure global control register contents do not change * until new CPU has initialized control registers. */ - spin_lock(&ctl_lock); + system_ctlreg_lock(); pcpu_prepare_secondary(pcpu, cpu); pcpu_attach_task(pcpu, tidle); pcpu_start_fn(pcpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); - spin_unlock(&ctl_lock); + system_ctlreg_unlock(); return 0; } @@ -957,7 +910,7 @@ early_param("possible_cpus", _setup_possible_cpus); int __cpu_disable(void) { - unsigned long cregs[16]; + struct ctlreg cregs[16]; int cpu; /* Handle possible pending IPIs */ @@ -969,11 +922,11 @@ int __cpu_disable(void) /* Disable pseudo page faults on this cpu. */ pfault_fini(); /* Disable interrupt sources via control register. */ - __ctl_store(cregs, 0, 15); - cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ - cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ - cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ - __ctl_load(cregs, 0, 15); + __local_ctl_store(0, 15, cregs); + cregs[0].val &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6].val &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14].val &= ~0x1f000000UL; /* disable most machine checks */ + __local_ctl_load(0, 15, cregs); clear_cpu_flag(CIF_NOHZ_DELAY); return 0; } @@ -1013,12 +966,12 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { - /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); - /* request the 0x1202 external call external interrupt */ + system_ctl_set_bit(0, 14); if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); + system_ctl_set_bit(0, 13); } void __init smp_prepare_boot_cpu(void) @@ -1076,11 +1029,9 @@ static ssize_t cpu_configure_store(struct device *dev, cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; - /* disallow configuration changes of online cpus and cpu 0 */ + /* disallow configuration changes of online cpus */ cpu = dev->id; cpu = smp_get_base_cpu(cpu); - if (cpu == 0) - goto out; for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_online(cpu + i)) goto out; @@ -1180,7 +1131,7 @@ static int smp_add_present_cpu(int cpu) return -ENOMEM; per_cpu(cpu_device, cpu) = c; s = &c->dev; - c->hotpluggable = 1; + c->hotpluggable = !!cpu; rc = register_cpu(c, cpu); if (rc) goto out; @@ -1258,60 +1209,3 @@ out: return rc; } subsys_initcall(s390_smp_init); - -static __always_inline void set_new_lowcore(struct lowcore *lc) -{ - union register_pair dst, src; - u32 pfx; - - src.even = (unsigned long) &S390_lowcore; - src.odd = sizeof(S390_lowcore); - dst.even = (unsigned long) lc; - dst.odd = sizeof(*lc); - pfx = __pa(lc); - - asm volatile( - " mvcl %[dst],%[src]\n" - " spx %[pfx]\n" - : [dst] "+&d" (dst.pair), [src] "+&d" (src.pair) - : [pfx] "Q" (pfx) - : "memory", "cc"); -} - -int __init smp_reinit_ipl_cpu(void) -{ - unsigned long async_stack, nodat_stack, mcck_stack; - struct lowcore *lc, *lc_ipl; - unsigned long flags, cr0; - u64 mcesad; - - lc_ipl = lowcore_ptr[0]; - lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); - async_stack = stack_alloc(); - mcck_stack = stack_alloc(); - if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad)) - panic("Couldn't allocate memory"); - - local_irq_save(flags); - local_mcck_disable(); - set_new_lowcore(lc); - S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET; - S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET; - S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET; - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); /* disable lowcore protection */ - S390_lowcore.mcesad = mcesad; - __ctl_load(cr0, 0, 0); - if (abs_lowcore_map(0, lc, false)) - panic("Couldn't remap absolute lowcore"); - lowcore_ptr[0] = lc; - local_mcck_enable(); - local_irq_restore(flags); - - memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE); - memblock_free_late(__pa(lc_ipl->async_stack - STACK_INIT_OFFSET), THREAD_SIZE); - memblock_free_late(__pa(lc_ipl->nodat_stack - STACK_INIT_OFFSET), THREAD_SIZE); - memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl)); - return 0; -} diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 0787010139f7..94f440e38303 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -6,9 +6,12 @@ */ #include <linux/stacktrace.h> +#include <linux/uaccess.h> +#include <linux/compat.h> #include <asm/stacktrace.h> #include <asm/unwind.h> #include <asm/kprobes.h> +#include <asm/ptrace.h> void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) @@ -58,3 +61,43 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, return -EINVAL; return 0; } + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + struct stack_frame_user __user *sf; + unsigned long ip, sp; + bool first = true; + + if (is_compat_task()) + return; + if (!consume_entry(cookie, instruction_pointer(regs))) + return; + sf = (void __user *)user_stack_pointer(regs); + pagefault_disable(); + while (1) { + if (__get_user(sp, &sf->back_chain)) + break; + if (__get_user(ip, &sf->gprs[8])) + break; + if (ip & 0x1) { + /* + * If the instruction address is invalid, and this + * is the first stack frame, assume r14 has not + * been written to the stack yet. Otherwise exit. + */ + if (first && !(regs->gprs[14] & 0x1)) + ip = regs->gprs[14]; + else + break; + } + if (!consume_entry(cookie, ip)) + break; + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (!sp || sp & 0x7) + break; + sf = (void __user *)sp; + first = false; + } + pagefault_enable(); +} diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d34d3548c046..14abad953c02 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -173,10 +173,10 @@ void init_cpu_timer(void) clockevents_register_device(cd); /* Enable clock comparator timer interrupt. */ - __ctl_set_bit(0,11); + local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SUBMASK_BIT); /* Always allow the timing alert external interrupt. */ - __ctl_set_bit(0, 4); + local_ctl_set_bit(0, CR0_ETR_SUBMASK_BIT); } static void clock_comparator_interrupt(struct ext_code ext_code, diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 23e868b79a6c..caec7db6f966 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -61,16 +61,6 @@ quiet_cmd_vdso32as = VDSO32A $@ quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $< -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so - # Generate VDSO offsets using helper script gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index fc1c6ff8178f..e3c9085f8fa7 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -70,16 +70,6 @@ quiet_cmd_vdso64as = VDSO64A $@ quiet_cmd_vdso64cc = VDSO64C $@ cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $< -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso64.so: $(obj)/vdso64.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso64.so - # Generate VDSO offsets using helper script gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index ff8349d17b33..5bfcc50c1a68 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -11,7 +11,7 @@ #include <linux/err.h> #include <linux/pgtable.h> #include <linux/bitfield.h> - +#include <asm/fault.h> #include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" @@ -466,23 +466,6 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, return 0; } -struct trans_exc_code_bits { - unsigned long addr : 52; /* Translation-exception Address */ - unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ - unsigned long : 2; - unsigned long b56 : 1; - unsigned long : 3; - unsigned long b60 : 1; - unsigned long b61 : 1; - unsigned long as : 2; /* ASCE Identifier */ -}; - -enum { - FSI_UNKNOWN = 0, /* Unknown whether fetch or store */ - FSI_STORE = 1, /* Exception was due to store operation */ - FSI_FETCH = 2 /* Exception was due to fetch operation */ -}; - enum prot_type { PROT_TYPE_LA = 0, PROT_TYPE_KEYC = 1, @@ -497,11 +480,11 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, enum gacc_mode mode, enum prot_type prot, bool terminate) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - struct trans_exc_code_bits *tec; + union teid *teid; memset(pgm, 0, sizeof(*pgm)); pgm->code = code; - tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + teid = (union teid *)&pgm->trans_exc_code; switch (code) { case PGM_PROTECTION: @@ -511,25 +494,25 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, WARN_ON_ONCE(1); break; case PROT_TYPE_IEP: - tec->b61 = 1; + teid->b61 = 1; fallthrough; case PROT_TYPE_LA: - tec->b56 = 1; + teid->b56 = 1; break; case PROT_TYPE_KEYC: - tec->b60 = 1; + teid->b60 = 1; break; case PROT_TYPE_ALC: - tec->b60 = 1; + teid->b60 = 1; fallthrough; case PROT_TYPE_DAT: - tec->b61 = 1; + teid->b61 = 1; break; } if (terminate) { - tec->b56 = 0; - tec->b60 = 0; - tec->b61 = 0; + teid->b56 = 0; + teid->b60 = 0; + teid->b61 = 0; } fallthrough; case PGM_ASCE_TYPE: @@ -543,9 +526,9 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, * exc_access_id has to be set to 0 for some instructions. Both * cases have to be handled by the caller. */ - tec->addr = gva >> PAGE_SHIFT; - tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; - tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + teid->addr = gva >> PAGE_SHIFT; + teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH; + teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as; fallthrough; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 11676b81e6bf..7aa0e668488f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4927,7 +4927,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu) } if (MACHINE_HAS_GS) { preempt_disable(); - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (current->thread.gs_cb) { vcpu->arch.host_gscb = current->thread.gs_cb; save_gs_cb(vcpu->arch.host_gscb); @@ -5004,13 +5004,13 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu) kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; if (MACHINE_HAS_GS) { preempt_disable(); - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (vcpu->arch.gs_enabled) save_gs_cb(current->thread.gs_cb); current->thread.gs_cb = vcpu->arch.host_gscb; restore_gs_cb(vcpu->arch.host_gscb); if (!vcpu->arch.host_gscb) - __ctl_clear_bit(2, 4); + local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT); vcpu->arch.host_gscb = NULL; preempt_enable(); } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index dc4cfa8795c0..621a17fd1a1b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -57,7 +57,7 @@ static int handle_gs(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 133)) { VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)"); preempt_disable(); - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb; restore_gs_cb(current->thread.gs_cb); preempt_enable(); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index e4a13d7cab6e..61d8dcd95bbc 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -12,21 +12,22 @@ #include <linux/export.h> #include <linux/mm.h> #include <asm/asm-extable.h> +#include <asm/ctlreg.h> #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) { - unsigned long cr1, cr7; + struct ctlreg cr1, cr7; - __ctl_store(cr1, 1, 1); - __ctl_store(cr7, 7, 7); - if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce) + local_ctl_store(1, &cr1); + local_ctl_store(7, &cr7); + if (cr1.val == S390_lowcore.kernel_asce.val && cr7.val == S390_lowcore.user_asce.val) return; panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" - "kernel: %016llx user: %016llx\n", - exit ? "exit" : "entry", cr1, cr7, - S390_lowcore.kernel_asce, S390_lowcore.user_asce); + "kernel: %016lx user: %016lx\n", + exit ? "exit" : "entry", cr1.val, cr7.val, + S390_lowcore.kernel_asce.val, S390_lowcore.user_asce.val); } #endif /*CONFIG_DEBUG_ENTRY */ diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index b51666967aa1..d37a8f607b71 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -287,7 +287,7 @@ static int pt_dump_init(void) * kernel ASCE. We need this to keep the page table walker functions * from accessing non-existent entries. */ - max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; + max_addr = (S390_lowcore.kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index fe87291df95d..0a0738a473af 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -61,6 +61,22 @@ static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, return true; } +static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); + unsigned int reg_data = FIELD_GET(EX_DATA_REG_ERR, ex->data); + unsigned long data, addr, offset; + + addr = regs->gprs[reg_addr]; + offset = addr & (sizeof(unsigned long) - 1); + addr &= ~(sizeof(unsigned long) - 1); + data = *(unsigned long *)addr; + data <<= BITS_PER_BYTE * offset; + regs->gprs[reg_data] = data; + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -81,6 +97,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_ua_load_reg(ex, false, regs); case EX_TYPE_UA_LOAD_REGPAIR: return ex_handler_ua_load_reg(ex, true, regs); + case EX_TYPE_ZEROPAD: + return ex_handler_zeropad(ex, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b678295931c3..249aefcf7c4e 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -3,17 +3,19 @@ * S390 version * Copyright IBM Corp. 1999 * Author(s): Hartmut Penner (hp@de.ibm.com) - * Ulrich Weigand (uweigand@de.ibm.com) + * Ulrich Weigand (uweigand@de.ibm.com) * * Derived from "arch/i386/mm/fault.c" * Copyright (C) 1995 Linus Torvalds */ #include <linux/kernel_stat.h> +#include <linux/mmu_context.h> #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> @@ -34,38 +36,27 @@ #include <linux/kfence.h> #include <asm/asm-extable.h> #include <asm/asm-offsets.h> +#include <asm/ptrace.h> +#include <asm/fault.h> #include <asm/diag.h> #include <asm/gmap.h> #include <asm/irq.h> -#include <asm/mmu_context.h> #include <asm/facility.h> #include <asm/uv.h> #include "../kernel/entry.h" -#define __FAIL_ADDR_MASK -4096L - -/* - * Allocate private vm_fault_reason from top. Please make sure it won't - * collide with vm_fault_reason. - */ -#define VM_FAULT_BADCONTEXT ((__force vm_fault_t)0x80000000) -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x40000000) -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x20000000) -#define VM_FAULT_SIGNAL ((__force vm_fault_t)0x10000000) -#define VM_FAULT_PFAULT ((__force vm_fault_t)0x8000000) - enum fault_type { KERNEL_FAULT, USER_FAULT, GMAP_FAULT, }; -static unsigned long store_indication __read_mostly; +static DEFINE_STATIC_KEY_FALSE(have_store_indication); static int __init fault_init(void) { if (test_facility(75)) - store_indication = 0xc00; + static_branch_enable(&have_store_indication); return 0; } early_initcall(fault_init); @@ -75,11 +66,9 @@ early_initcall(fault_init); */ static enum fault_type get_fault_type(struct pt_regs *regs) { - unsigned long trans_exc_code; + union teid teid = { .val = regs->int_parm_long }; - trans_exc_code = regs->int_parm_long & 3; - if (likely(trans_exc_code == 0)) { - /* primary space exception */ + if (likely(teid.as == PSW_BITS_AS_PRIMARY)) { if (user_mode(regs)) return USER_FAULT; if (!IS_ENABLED(CONFIG_PGSTE)) @@ -88,83 +77,77 @@ static enum fault_type get_fault_type(struct pt_regs *regs) return GMAP_FAULT; return KERNEL_FAULT; } - if (trans_exc_code == 2) + if (teid.as == PSW_BITS_AS_SECONDARY) return USER_FAULT; - if (trans_exc_code == 1) { - /* access register mode, not used in the kernel */ + /* Access register mode, not used in the kernel */ + if (teid.as == PSW_BITS_AS_ACCREG) return USER_FAULT; - } - /* home space exception -> access via kernel ASCE */ + /* Home space -> access via kernel ASCE */ return KERNEL_FAULT; } static unsigned long get_fault_address(struct pt_regs *regs) { - unsigned long trans_exc_code = regs->int_parm_long; + union teid teid = { .val = regs->int_parm_long }; - return trans_exc_code & __FAIL_ADDR_MASK; + return teid.addr * PAGE_SIZE; } -static bool fault_is_write(struct pt_regs *regs) +static __always_inline bool fault_is_write(struct pt_regs *regs) { - unsigned long trans_exc_code = regs->int_parm_long; + union teid teid = { .val = regs->int_parm_long }; - return (trans_exc_code & store_indication) == 0x400; -} - -static int bad_address(void *p) -{ - unsigned long dummy; - - return get_kernel_nofault(dummy, (unsigned long *)p); + if (static_branch_likely(&have_store_indication)) + return teid.fsi == TEID_FSI_STORE; + return false; } static void dump_pagetable(unsigned long asce, unsigned long address) { - unsigned long *table = __va(asce & _ASCE_ORIGIN); + unsigned long entry, *table = __va(asce & _ASCE_ORIGIN); pr_alert("AS:%016lx ", asce); switch (asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: table += (address & _REGION1_INDEX) >> _REGION1_SHIFT; - if (bad_address(table)) + if (get_kernel_nofault(entry, table)) goto bad; - pr_cont("R1:%016lx ", *table); - if (*table & _REGION_ENTRY_INVALID) + pr_cont("R1:%016lx ", entry); + if (entry & _REGION_ENTRY_INVALID) goto out; - table = __va(*table & _REGION_ENTRY_ORIGIN); + table = __va(entry & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION2: table += (address & _REGION2_INDEX) >> _REGION2_SHIFT; - if (bad_address(table)) + if (get_kernel_nofault(entry, table)) goto bad; - pr_cont("R2:%016lx ", *table); - if (*table & _REGION_ENTRY_INVALID) + pr_cont("R2:%016lx ", entry); + if (entry & _REGION_ENTRY_INVALID) goto out; - table = __va(*table & _REGION_ENTRY_ORIGIN); + table = __va(entry & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION3: table += (address & _REGION3_INDEX) >> _REGION3_SHIFT; - if (bad_address(table)) + if (get_kernel_nofault(entry, table)) goto bad; - pr_cont("R3:%016lx ", *table); - if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) + pr_cont("R3:%016lx ", entry); + if (entry & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) goto out; - table = __va(*table & _REGION_ENTRY_ORIGIN); + table = __va(entry & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_SEGMENT: table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; - if (bad_address(table)) + if (get_kernel_nofault(entry, table)) goto bad; - pr_cont("S:%016lx ", *table); - if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) + pr_cont("S:%016lx ", entry); + if (entry & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) goto out; - table = __va(*table & _SEGMENT_ENTRY_ORIGIN); + table = __va(entry & _SEGMENT_ENTRY_ORIGIN); } table += (address & _PAGE_INDEX) >> _PAGE_SHIFT; - if (bad_address(table)) + if (get_kernel_nofault(entry, table)) goto bad; - pr_cont("P:%016lx ", *table); + pr_cont("P:%016lx ", entry); out: pr_cont("\n"); return; @@ -174,173 +157,113 @@ bad: static void dump_fault_info(struct pt_regs *regs) { + union teid teid = { .val = regs->int_parm_long }; unsigned long asce; pr_alert("Failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + get_fault_address(regs), teid.val); pr_alert("Fault in "); - switch (regs->int_parm_long & 3) { - case 3: + switch (teid.as) { + case PSW_BITS_AS_HOME: pr_cont("home space "); break; - case 2: + case PSW_BITS_AS_SECONDARY: pr_cont("secondary space "); break; - case 1: + case PSW_BITS_AS_ACCREG: pr_cont("access register "); break; - case 0: + case PSW_BITS_AS_PRIMARY: pr_cont("primary space "); break; } pr_cont("mode while using "); switch (get_fault_type(regs)) { case USER_FAULT: - asce = S390_lowcore.user_asce; + asce = S390_lowcore.user_asce.val; pr_cont("user "); break; case GMAP_FAULT: - asce = ((struct gmap *) S390_lowcore.gmap)->asce; + asce = ((struct gmap *)S390_lowcore.gmap)->asce; pr_cont("gmap "); break; case KERNEL_FAULT: - asce = S390_lowcore.kernel_asce; + asce = S390_lowcore.kernel_asce.val; pr_cont("kernel "); break; default: unreachable(); } pr_cont("ASCE.\n"); - dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); + dump_pagetable(asce, get_fault_address(regs)); } int show_unhandled_signals = 1; void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; if (!unhandled_signal(current, signr)) return; - if (!printk_ratelimit()) + if (!__ratelimit(&rs)) return; - printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ", - regs->int_code & 0xffff, regs->int_code >> 17); + pr_alert("User process fault: interruption code %04x ilc:%d ", + regs->int_code & 0xffff, regs->int_code >> 17); print_vma_addr(KERN_CONT "in ", regs->psw.addr); - printk(KERN_CONT "\n"); + pr_cont("\n"); if (is_mm_fault) dump_fault_info(regs); show_regs(regs); } -/* - * Send SIGSEGV to task. This is an external routine - * to keep the stack usage of do_page_fault small. - */ -static noinline void do_sigsegv(struct pt_regs *regs, int si_code) +static void do_sigsegv(struct pt_regs *regs, int si_code) { report_user_fault(regs, SIGSEGV, 1); - force_sig_fault(SIGSEGV, si_code, - (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); + force_sig_fault(SIGSEGV, si_code, (void __user *)get_fault_address(regs)); } -static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault) +static void handle_fault_error_nolock(struct pt_regs *regs, int si_code) { enum fault_type fault_type; unsigned long address; bool is_write; + if (user_mode(regs)) { + if (WARN_ON_ONCE(!si_code)) + si_code = SEGV_MAPERR; + return do_sigsegv(regs, si_code); + } if (fixup_exception(regs)) return; fault_type = get_fault_type(regs); - if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) { + if (fault_type == KERNEL_FAULT) { address = get_fault_address(regs); is_write = fault_is_write(regs); if (kfence_handle_page_fault(address, is_write, regs)) return; } - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ if (fault_type == KERNEL_FAULT) - printk(KERN_ALERT "Unable to handle kernel pointer dereference" - " in virtual kernel address space\n"); + pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n"); else - printk(KERN_ALERT "Unable to handle kernel paging request" - " in virtual user address space\n"); + pr_alert("Unable to handle kernel paging request in virtual user address space\n"); dump_fault_info(regs); die(regs, "Oops"); } -static noinline void do_low_address(struct pt_regs *regs) +static void handle_fault_error(struct pt_regs *regs, int si_code) { - /* Low-address protection hit in kernel mode means - NULL pointer write access in kernel mode. */ - if (regs->psw.mask & PSW_MASK_PSTATE) { - /* Low-address protection hit in user mode 'cannot happen'. */ - die (regs, "Low-address protection"); - } + struct mm_struct *mm = current->mm; - do_no_context(regs, VM_FAULT_BADACCESS); -} - -static noinline void do_sigbus(struct pt_regs *regs) -{ - /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. - */ - force_sig_fault(SIGBUS, BUS_ADRERR, - (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); + mmap_read_unlock(mm); + handle_fault_error_nolock(regs, si_code); } -static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault) +static void do_sigbus(struct pt_regs *regs) { - int si_code; - - switch (fault) { - case VM_FAULT_BADACCESS: - case VM_FAULT_BADMAP: - /* Bad memory access. Check if it is kernel or user space. */ - if (user_mode(regs)) { - /* User mode accesses just cause a SIGSEGV */ - si_code = (fault == VM_FAULT_BADMAP) ? - SEGV_MAPERR : SEGV_ACCERR; - do_sigsegv(regs, si_code); - break; - } - fallthrough; - case VM_FAULT_BADCONTEXT: - case VM_FAULT_PFAULT: - do_no_context(regs, fault); - break; - case VM_FAULT_SIGNAL: - if (!user_mode(regs)) - do_no_context(regs, fault); - break; - default: /* fault & VM_FAULT_ERROR */ - if (fault & VM_FAULT_OOM) { - if (!user_mode(regs)) - do_no_context(regs, fault); - else - pagefault_out_of_memory(); - } else if (fault & VM_FAULT_SIGSEGV) { - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - do_no_context(regs, fault); - else - do_sigsegv(regs, SEGV_MAPERR); - } else if (fault & VM_FAULT_SIGBUS) { - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - do_no_context(regs, fault); - else - do_sigbus(regs); - } else - BUG(); - break; - } + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)get_fault_address(regs)); } /* @@ -349,20 +272,20 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault) * routines. * * interruption code (int_code): - * 04 Protection -> Write-Protection (suppression) - * 10 Segment translation -> Not present (nullification) - * 11 Page translation -> Not present (nullification) - * 3b Region third trans. -> Not present (nullification) + * 04 Protection -> Write-Protection (suppression) + * 10 Segment translation -> Not present (nullification) + * 11 Page translation -> Not present (nullification) + * 3b Region third trans. -> Not present (nullification) */ -static inline vm_fault_t do_exception(struct pt_regs *regs, int access) +static void do_exception(struct pt_regs *regs, int access) { - struct gmap *gmap; + struct vm_area_struct *vma; struct task_struct *tsk; + unsigned long address; struct mm_struct *mm; - struct vm_area_struct *vma; enum fault_type type; - unsigned long address; unsigned int flags; + struct gmap *gmap; vm_fault_t fault; bool is_write; @@ -372,31 +295,21 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) * been nullified. Don't signal single step via SIGTRAP. */ clear_thread_flag(TIF_PER_TRAP); - if (kprobe_page_fault(regs, 14)) - return 0; - + return; mm = tsk->mm; address = get_fault_address(regs); is_write = fault_is_write(regs); - - /* - * Verify that the fault happened in user space, that - * we are not in an interrupt and that there is a - * user context. - */ - fault = VM_FAULT_BADCONTEXT; type = get_fault_type(regs); switch (type) { case KERNEL_FAULT: - goto out; + return handle_fault_error_nolock(regs, 0); case USER_FAULT: case GMAP_FAULT: if (faulthandler_disabled() || !mm) - goto out; + return handle_fault_error_nolock(regs, 0); break; } - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_DEFAULT; if (user_mode(regs)) @@ -419,125 +332,117 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) vma_end_read(vma); if (!(fault & VM_FAULT_RETRY)) { count_vm_vma_lock_event(VMA_LOCK_SUCCESS); - if (likely(!(fault & VM_FAULT_ERROR))) - fault = 0; - goto out; + if (unlikely(fault & VM_FAULT_ERROR)) + goto error; + return; } count_vm_vma_lock_event(VMA_LOCK_RETRY); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { - fault = VM_FAULT_SIGNAL; - goto out; + if (!user_mode(regs)) + handle_fault_error_nolock(regs, 0); + return; } lock_mmap: mmap_read_lock(mm); - gmap = NULL; if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { - gmap = (struct gmap *) S390_lowcore.gmap; + gmap = (struct gmap *)S390_lowcore.gmap; current->thread.gmap_addr = address; current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); current->thread.gmap_int_code = regs->int_code & 0xffff; address = __gmap_translate(gmap, address); - if (address == -EFAULT) { - fault = VM_FAULT_BADMAP; - goto out_up; - } + if (address == -EFAULT) + return handle_fault_error(regs, SEGV_MAPERR); if (gmap->pfault_enabled) flags |= FAULT_FLAG_RETRY_NOWAIT; } - retry: - fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) - goto out_up; - + return handle_fault_error(regs, SEGV_MAPERR); if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) - goto out_up; + return handle_fault_error(regs, SEGV_MAPERR); vma = expand_stack(mm, address); if (!vma) - goto out; + return handle_fault_error_nolock(regs, SEGV_MAPERR); } - - /* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ - fault = VM_FAULT_BADACCESS; if (unlikely(!(vma->vm_flags & access))) - goto out_up; - - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ + return handle_fault_error(regs, SEGV_ACCERR); fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { - fault = VM_FAULT_SIGNAL; if (flags & FAULT_FLAG_RETRY_NOWAIT) - goto out_up; - goto out; + mmap_read_unlock(mm); + if (!user_mode(regs)) + handle_fault_error_nolock(regs, 0); + return; } - /* The fault is fully completed (including releasing mmap lock) */ if (fault & VM_FAULT_COMPLETED) { if (gmap) { mmap_read_lock(mm); - goto out_gmap; + goto gmap; } - fault = 0; - goto out; + return; + } + if (unlikely(fault & VM_FAULT_ERROR)) { + mmap_read_unlock(mm); + goto error; } - - if (unlikely(fault & VM_FAULT_ERROR)) - goto out_up; - if (fault & VM_FAULT_RETRY) { - if (IS_ENABLED(CONFIG_PGSTE) && gmap && - (flags & FAULT_FLAG_RETRY_NOWAIT)) { + if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { /* - * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has - * not been released + * FAULT_FLAG_RETRY_NOWAIT has been set, + * mmap_lock has not been released */ current->thread.gmap_pfault = 1; - fault = VM_FAULT_PFAULT; - goto out_up; + return handle_fault_error(regs, 0); } flags &= ~FAULT_FLAG_RETRY_NOWAIT; flags |= FAULT_FLAG_TRIED; mmap_read_lock(mm); goto retry; } -out_gmap: +gmap: if (IS_ENABLED(CONFIG_PGSTE) && gmap) { address = __gmap_link(gmap, current->thread.gmap_addr, address); - if (address == -EFAULT) { - fault = VM_FAULT_BADMAP; - goto out_up; - } + if (address == -EFAULT) + return handle_fault_error(regs, SEGV_MAPERR); if (address == -ENOMEM) { fault = VM_FAULT_OOM; - goto out_up; + mmap_read_unlock(mm); + goto error; } } - fault = 0; -out_up: mmap_read_unlock(mm); -out: - return fault; + return; +error: + if (fault & VM_FAULT_OOM) { + if (!user_mode(regs)) + handle_fault_error_nolock(regs, 0); + else + pagefault_out_of_memory(); + } else if (fault & VM_FAULT_SIGSEGV) { + if (!user_mode(regs)) + handle_fault_error_nolock(regs, 0); + else + do_sigsegv(regs, SEGV_MAPERR); + } else if (fault & VM_FAULT_SIGBUS) { + if (!user_mode(regs)) + handle_fault_error_nolock(regs, 0); + else + do_sigbus(regs); + } else { + BUG(); + } } void do_protection_exception(struct pt_regs *regs) { - unsigned long trans_exc_code; - int access; - vm_fault_t fault; + union teid teid = { .val = regs->int_parm_long }; - trans_exc_code = regs->int_parm_long; /* * Protection exceptions are suppressing, decrement psw address. * The exception to this rule are aborted transactions, for these @@ -550,33 +455,28 @@ void do_protection_exception(struct pt_regs *regs) * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ - if (unlikely(!(trans_exc_code & 4))) { - do_low_address(regs); - return; + if (unlikely(!teid.b61)) { + if (user_mode(regs)) { + /* Low-address protection in user mode: cannot happen */ + die(regs, "Low-address protection"); + } + /* + * Low-address protection in kernel mode means + * NULL pointer write access in kernel mode. + */ + return handle_fault_error_nolock(regs, 0); } - if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) { - regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) | - (regs->psw.addr & PAGE_MASK); - access = VM_EXEC; - fault = VM_FAULT_BADACCESS; - } else { - access = VM_WRITE; - fault = do_exception(regs, access); + if (unlikely(MACHINE_HAS_NX && teid.b56)) { + regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK); + return handle_fault_error_nolock(regs, SEGV_ACCERR); } - if (unlikely(fault)) - do_fault_error(regs, fault); + do_exception(regs, VM_WRITE); } NOKPROBE_SYMBOL(do_protection_exception); void do_dat_exception(struct pt_regs *regs) { - int access; - vm_fault_t fault; - - access = VM_ACCESS_FLAGS; - fault = do_exception(regs, access); - if (unlikely(fault)) - do_fault_error(regs, fault); + do_exception(regs, VM_ACCESS_FLAGS); } NOKPROBE_SYMBOL(do_dat_exception); @@ -584,7 +484,8 @@ NOKPROBE_SYMBOL(do_dat_exception); void do_secure_storage_access(struct pt_regs *regs) { - unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK; + union teid teid = { .val = regs->int_parm_long }; + unsigned long addr = get_fault_address(regs); struct vm_area_struct *vma; struct mm_struct *mm; struct page *page; @@ -592,14 +493,12 @@ void do_secure_storage_access(struct pt_regs *regs) int rc; /* - * bit 61 tells us if the address is valid, if it's not we - * have a major problem and should stop the kernel or send a - * SIGSEGV to the process. Unfortunately bit 61 is not - * reliable without the misc UV feature so we need to check - * for that as well. + * Bit 61 indicates if the address is valid, if it is not the + * kernel should be stopped or SIGSEGV should be sent to the + * process. Bit 61 is not reliable without the misc UV feature, + * therefore this needs to be checked too. */ - if (uv_has_feature(BIT_UV_FEAT_MISC) && - !test_bit_inv(61, ®s->int_parm_long)) { + if (uv_has_feature(BIT_UV_FEAT_MISC) && !teid.b61) { /* * When this happens, userspace did something that it * was not supposed to do, e.g. branching into secure @@ -609,14 +508,12 @@ void do_secure_storage_access(struct pt_regs *regs) send_sig(SIGSEGV, current, 0); return; } - /* - * The kernel should never run into this case and we - * have no way out of this situation. + * The kernel should never run into this case and + * there is no way out of this situation. */ panic("Unexpected PGM 0x3d with TEID bit 61=0"); } - switch (get_fault_type(regs)) { case GMAP_FAULT: mm = current->mm; @@ -624,20 +521,15 @@ void do_secure_storage_access(struct pt_regs *regs) mmap_read_lock(mm); addr = __gmap_translate(gmap, addr); mmap_read_unlock(mm); - if (IS_ERR_VALUE(addr)) { - do_fault_error(regs, VM_FAULT_BADMAP); - break; - } + if (IS_ERR_VALUE(addr)) + return handle_fault_error_nolock(regs, SEGV_MAPERR); fallthrough; case USER_FAULT: mm = current->mm; mmap_read_lock(mm); vma = find_vma(mm, addr); - if (!vma) { - mmap_read_unlock(mm); - do_fault_error(regs, VM_FAULT_BADMAP); - break; - } + if (!vma) + return handle_fault_error(regs, SEGV_MAPERR); page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); if (IS_ERR_OR_NULL(page)) { mmap_read_unlock(mm); @@ -658,23 +550,18 @@ void do_secure_storage_access(struct pt_regs *regs) BUG(); break; default: - do_fault_error(regs, VM_FAULT_BADMAP); - WARN_ON_ONCE(1); + unreachable(); } } NOKPROBE_SYMBOL(do_secure_storage_access); void do_non_secure_storage_access(struct pt_regs *regs) { - unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK; struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + unsigned long gaddr = get_fault_address(regs); - if (get_fault_type(regs) != GMAP_FAULT) { - do_fault_error(regs, VM_FAULT_BADMAP); - WARN_ON_ONCE(1); - return; - } - + if (WARN_ON_ONCE(get_fault_type(regs) != GMAP_FAULT)) + return handle_fault_error_nolock(regs, SEGV_MAPERR); if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL) send_sig(SIGSEGV, current, 0); } @@ -682,8 +569,8 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access); void do_secure_storage_violation(struct pt_regs *regs) { - unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK; struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + unsigned long gaddr = get_fault_address(regs); /* * If the VM has been rebooted, its address space might still contain @@ -699,9 +586,8 @@ void do_secure_storage_violation(struct pt_regs *regs) * This exception is only triggered when a guest 2 is running * and can therefore never occur in kernel context. */ - printk_ratelimited(KERN_WARNING - "Secure storage violation in task: %s, pid %d\n", - current->comm, current->pid); + pr_warn_ratelimited("Secure storage violation in task: %s, pid %d\n", + current->comm, current->pid); send_sig(SIGSEGV, current, 0); } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 906a7bfc2a78..6f96b5a71c63 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -18,13 +18,25 @@ #include <linux/ksm.h> #include <linux/mman.h> #include <linux/pgtable.h> - +#include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/gmap.h> +#include <asm/page.h> #include <asm/tlb.h> #define GMAP_SHADOW_FAKE_TABLE 1ULL +static struct page *gmap_alloc_crst(void) +{ + struct page *page; + + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + if (!page) + return NULL; + __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER); + return page; +} + /** * gmap_alloc - allocate and initialize a guest address space * @limit: maximum address of the gmap address space @@ -67,7 +79,7 @@ static struct gmap *gmap_alloc(unsigned long limit) spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->shadow_lock); refcount_set(&gmap->ref_count, 1); - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) goto out_free; page->index = 0; @@ -308,7 +320,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; new = page_to_virt(page); @@ -1759,7 +1771,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = r2t & _REGION_ENTRY_ORIGIN; @@ -1843,7 +1855,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = r3t & _REGION_ENTRY_ORIGIN; @@ -1927,7 +1939,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); /* Allocate a shadow segment table */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = sgt & _REGION_ENTRY_ORIGIN; @@ -2855,7 +2867,7 @@ int s390_replace_asce(struct gmap *gmap) if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) return -EINVAL; - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = 0; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8b94d2212d33..43e612bc2bcd 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -35,6 +35,7 @@ #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgalloc.h> +#include <asm/ctlreg.h> #include <asm/kfence.h> #include <asm/ptdump.h> #include <asm/dma.h> @@ -42,7 +43,6 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> -#include <asm/ctl_reg.h> #include <asm/sclp.h> #include <asm/set_memory.h> #include <asm/kasan.h> @@ -54,7 +54,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); -unsigned long __bootdata_preserved(s390_invalid_asce); +struct ctlreg __bootdata_preserved(s390_invalid_asce); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); @@ -164,14 +164,10 @@ void __init mem_init(void) pv_init(); kfence_split_mapping(); - /* Setup guest page hinting */ - cmma_init(); /* this will put all low memory onto the freelists */ memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ - - cmma_init_nodat(); } void free_initmem(void) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index c805b3e2592b..632c3a55feed 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -15,10 +15,10 @@ #include <linux/uio.h> #include <linux/io.h> #include <asm/asm-extable.h> -#include <asm/ctl_reg.h> #include <asm/abs_lowcore.h> #include <asm/stacktrace.h> #include <asm/maccess.h> +#include <asm/ctlreg.h> unsigned long __bootdata_preserved(__memcpy_real_area); pte_t *__bootdata_preserved(memcpy_real_ptep); diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 1e2ea706aa22..01f9b39e65f5 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -7,199 +7,18 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/types.h> #include <linux/mm.h> -#include <linux/memblock.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <asm/asm-extable.h> -#include <asm/facility.h> #include <asm/page-states.h> +#include <asm/sections.h> +#include <asm/page.h> -static int cmma_flag = 1; - -static int __init cmma(char *str) -{ - bool enabled; - - if (!kstrtobool(str, &enabled)) - cmma_flag = enabled; - return 1; -} -__setup("cmma=", cmma); - -static inline int cmma_test_essa(void) -{ - unsigned long tmp = 0; - int rc = -EOPNOTSUPP; - - /* test ESSA_GET_STATE */ - asm volatile( - " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" - "0: la %[rc],0\n" - "1:\n" - EX_TABLE(0b,1b) - : [rc] "+&d" (rc), [tmp] "+&d" (tmp) - : [cmd] "i" (ESSA_GET_STATE)); - return rc; -} - -void __init cmma_init(void) -{ - if (!cmma_flag) - return; - if (cmma_test_essa()) { - cmma_flag = 0; - return; - } - if (test_facility(147)) - cmma_flag = 2; -} - -static inline void set_page_unused(struct page *page, int order) -{ - int i, rc; - - for (i = 0; i < (1 << order); i++) - asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" - : "=&d" (rc) - : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_UNUSED)); -} - -static inline void set_page_stable_dat(struct page *page, int order) -{ - int i, rc; - - for (i = 0; i < (1 << order); i++) - asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" - : "=&d" (rc) - : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_STABLE)); -} - -static inline void set_page_stable_nodat(struct page *page, int order) -{ - int i, rc; - - for (i = 0; i < (1 << order); i++) - asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" - : "=&d" (rc) - : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_STABLE_NODAT)); -} - -static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end) -{ - unsigned long next; - struct page *page; - pmd_t *pmd; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_none(*pmd) || pmd_large(*pmd)) - continue; - page = phys_to_page(pmd_val(*pmd)); - set_bit(PG_arch_1, &page->flags); - } while (pmd++, addr = next, addr != end); -} - -static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) -{ - unsigned long next; - struct page *page; - pud_t *pud; - int i; - - pud = pud_offset(p4d, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none(*pud) || pud_large(*pud)) - continue; - if (!pud_folded(*pud)) { - page = phys_to_page(pud_val(*pud)); - for (i = 0; i < 3; i++) - set_bit(PG_arch_1, &page[i].flags); - } - mark_kernel_pmd(pud, addr, next); - } while (pud++, addr = next, addr != end); -} - -static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) -{ - unsigned long next; - struct page *page; - p4d_t *p4d; - int i; - - p4d = p4d_offset(pgd, addr); - do { - next = p4d_addr_end(addr, end); - if (p4d_none(*p4d)) - continue; - if (!p4d_folded(*p4d)) { - page = phys_to_page(p4d_val(*p4d)); - for (i = 0; i < 3; i++) - set_bit(PG_arch_1, &page[i].flags); - } - mark_kernel_pud(p4d, addr, next); - } while (p4d++, addr = next, addr != end); -} - -static void mark_kernel_pgd(void) -{ - unsigned long addr, next; - struct page *page; - pgd_t *pgd; - int i; - - addr = 0; - pgd = pgd_offset_k(addr); - do { - next = pgd_addr_end(addr, MODULES_END); - if (pgd_none(*pgd)) - continue; - if (!pgd_folded(*pgd)) { - page = phys_to_page(pgd_val(*pgd)); - for (i = 0; i < 3; i++) - set_bit(PG_arch_1, &page[i].flags); - } - mark_kernel_p4d(pgd, addr, next); - } while (pgd++, addr = next, addr != MODULES_END); -} - -void __init cmma_init_nodat(void) -{ - struct page *page; - unsigned long start, end, ix; - int i; - - if (cmma_flag < 2) - return; - /* Mark pages used in kernel page tables */ - mark_kernel_pgd(); - - /* Set all kernel pages not used for page tables to stable/no-dat */ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { - page = pfn_to_page(start); - for (ix = start; ix < end; ix++, page++) { - if (__test_and_clear_bit(PG_arch_1, &page->flags)) - continue; /* skip page table pages */ - if (!list_empty(&page->lru)) - continue; /* skip free pages */ - set_page_stable_nodat(page, 0); - } - } -} +int __bootdata_preserved(cmma_flag); void arch_free_page(struct page *page, int order) { if (!cmma_flag) return; - set_page_unused(page, order); + __set_page_unused(page_to_virt(page), 1UL << order); } void arch_alloc_page(struct page *page, int order) @@ -207,14 +26,7 @@ void arch_alloc_page(struct page *page, int order) if (!cmma_flag) return; if (cmma_flag < 2) - set_page_stable_dat(page, order); + __set_page_stable_dat(page_to_virt(page), 1UL << order); else - set_page_stable_nodat(page, order); -} - -void arch_set_page_dat(struct page *page, int order) -{ - if (!cmma_flag) - return; - set_page_stable_dat(page, order); + __set_page_stable_nodat(page_to_virt(page), 1UL << order); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index b87e96c64b61..631e3a4ee2de 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -75,7 +75,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, break; } table = (unsigned long *)((unsigned long)old & mask); - crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce); + crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce.val); } else if (MACHINE_HAS_IDTE) { cspg(old, *old, new); } else { diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 75e1039f2ec5..008e487c94a6 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/mm.h> #include <asm/mmu_context.h> +#include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/tlb.h> @@ -43,11 +44,13 @@ __initcall(page_table_register_sysctl); unsigned long *crst_table_alloc(struct mm_struct *mm) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); + unsigned long *table; if (!ptdesc) return NULL; - arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER); - return (unsigned long *) ptdesc_to_virt(ptdesc); + table = ptdesc_to_virt(ptdesc); + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); + return table; } void crst_table_free(struct mm_struct *mm, unsigned long *table) @@ -61,8 +64,8 @@ static void __crst_table_upgrade(void *arg) /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { - S390_lowcore.user_asce = mm->context.asce; - __ctl_load(S390_lowcore.user_asce, 7, 7); + S390_lowcore.user_asce.val = mm->context.asce; + local_ctl_load(7, &S390_lowcore.user_asce); } __tlb_flush_local(); } @@ -130,11 +133,6 @@ err_p4d: return -ENOMEM; } -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) -{ - return atomic_fetch_xor(bits, v) ^ bits; -} - #ifdef CONFIG_PGSTE struct page *page_table_alloc_pgste(struct mm_struct *mm) @@ -145,6 +143,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (ptdesc) { table = (u64 *)ptdesc_to_virt(ptdesc); + __arch_set_page_dat(table, 1); memset64(table, _PAGE_INVALID, PTRS_PER_PTE); memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } @@ -158,125 +157,11 @@ void page_table_free_pgste(struct page *page) #endif /* CONFIG_PGSTE */ -/* - * A 2KB-pgtable is either upper or lower half of a normal page. - * The second half of the page may be unused or used as another - * 2KB-pgtable. - * - * Whenever possible the parent page for a new 2KB-pgtable is picked - * from the list of partially allocated pages mm_context_t::pgtable_list. - * In case the list is empty a new parent page is allocated and added to - * the list. - * - * When a parent page gets fully allocated it contains 2KB-pgtables in both - * upper and lower halves and is removed from mm_context_t::pgtable_list. - * - * When 2KB-pgtable is freed from to fully allocated parent page that - * page turns partially allocated and added to mm_context_t::pgtable_list. - * - * If 2KB-pgtable is freed from the partially allocated parent page that - * page turns unused and gets removed from mm_context_t::pgtable_list. - * Furthermore, the unused parent page is released. - * - * As follows from the above, no unallocated or fully allocated parent - * pages are contained in mm_context_t::pgtable_list. - * - * The upper byte (bits 24-31) of the parent page _refcount is used - * for tracking contained 2KB-pgtables and has the following format: - * - * PP AA - * 01234567 upper byte (bits 24-31) of struct page::_refcount - * || || - * || |+--- upper 2KB-pgtable is allocated - * || +---- lower 2KB-pgtable is allocated - * |+------- upper 2KB-pgtable is pending for removal - * +-------- lower 2KB-pgtable is pending for removal - * - * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why - * using _refcount is possible). - * - * When 2KB-pgtable is allocated the corresponding AA bit is set to 1. - * The parent page is either: - * - added to mm_context_t::pgtable_list in case the second half of the - * parent page is still unallocated; - * - removed from mm_context_t::pgtable_list in case both hales of the - * parent page are allocated; - * These operations are protected with mm_context_t::lock. - * - * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0 - * and the corresponding PP bit is set to 1 in a single atomic operation. - * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually - * exclusive and may never be both set to 1! - * The parent page is either: - * - added to mm_context_t::pgtable_list in case the second half of the - * parent page is still allocated; - * - removed from mm_context_t::pgtable_list in case the second half of - * the parent page is unallocated; - * These operations are protected with mm_context_t::lock. - * - * It is important to understand that mm_context_t::lock only protects - * mm_context_t::pgtable_list and AA bits, but not the parent page itself - * and PP bits. - * - * Releasing the parent page happens whenever the PP bit turns from 1 to 0, - * while both AA bits and the second PP bit are already unset. Then the - * parent page does not contain any 2KB-pgtable fragment anymore, and it has - * also been removed from mm_context_t::pgtable_list. It is safe to release - * the page therefore. - * - * PGSTE memory spaces use full 4KB-pgtables and do not need most of the - * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable - * while the PP bits are never used, nor such a page is added to or removed - * from mm_context_t::pgtable_list. - * - * pte_free_defer() overrides those rules: it takes the page off pgtable_list, - * and prevents both 2K fragments from being reused. pte_free_defer() has to - * guarantee that its pgtable cannot be reused before the RCU grace period - * has elapsed (which page_table_free_rcu() does not actually guarantee). - * But for simplicity, because page->rcu_head overlays page->lru, and because - * the RCU callback might not be called before the mm_context_t has been freed, - * pte_free_defer() in this implementation prevents both fragments from being - * reused, and delays making the call to RCU until both fragments are freed. - */ unsigned long *page_table_alloc(struct mm_struct *mm) { - unsigned long *table; struct ptdesc *ptdesc; - unsigned int mask, bit; - - /* Try to get a fragment of a 4K page as a 2K page table */ - if (!mm_alloc_pgste(mm)) { - table = NULL; - spin_lock_bh(&mm->context.lock); - if (!list_empty(&mm->context.pgtable_list)) { - ptdesc = list_first_entry(&mm->context.pgtable_list, - struct ptdesc, pt_list); - mask = atomic_read(&ptdesc->_refcount) >> 24; - /* - * The pending removal bits must also be checked. - * Failure to do so might lead to an impossible - * value of (i.e 0x13 or 0x23) written to _refcount. - * Such values violate the assumption that pending and - * allocation bits are mutually exclusive, and the rest - * of the code unrails as result. That could lead to - * a whole bunch of races and corruptions. - */ - mask = (mask | (mask >> 4)) & 0x03U; - if (mask != 0x03U) { - table = (unsigned long *) ptdesc_to_virt(ptdesc); - bit = mask & 1; /* =1 -> second 2K */ - if (bit) - table += PTRS_PER_PTE; - atomic_xor_bits(&ptdesc->_refcount, - 0x01U << (bit + 24)); - list_del_init(&ptdesc->pt_list); - } - } - spin_unlock_bh(&mm->context.lock); - if (table) - return table; - } - /* Allocate a fresh page */ + unsigned long *table; + ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; @@ -284,177 +169,57 @@ unsigned long *page_table_alloc(struct mm_struct *mm) pagetable_free(ptdesc); return NULL; } - arch_set_page_dat(ptdesc_page(ptdesc), 0); - /* Initialize page table */ - table = (unsigned long *) ptdesc_to_virt(ptdesc); - if (mm_alloc_pgste(mm)) { - /* Return 4K page table with PGSTEs */ - INIT_LIST_HEAD(&ptdesc->pt_list); - atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); - memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); - memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); - } else { - /* Return the first 2K fragment of the page */ - atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24); - memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); - spin_lock_bh(&mm->context.lock); - list_add(&ptdesc->pt_list, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.lock); - } + table = ptdesc_to_virt(ptdesc); + __arch_set_page_dat(table, 1); + /* pt_list is used by gmap only */ + INIT_LIST_HEAD(&ptdesc->pt_list); + memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); + memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); return table; } -static void page_table_release_check(struct page *page, void *table, - unsigned int half, unsigned int mask) +static void pagetable_pte_dtor_free(struct ptdesc *ptdesc) { - char msg[128]; - - if (!IS_ENABLED(CONFIG_DEBUG_VM)) - return; - if (!mask && list_empty(&page->lru)) - return; - snprintf(msg, sizeof(msg), - "Invalid pgtable %p release half 0x%02x mask 0x%02x", - table, half, mask); - dump_page(page, msg); -} - -static void pte_free_now(struct rcu_head *head) -{ - struct ptdesc *ptdesc; - - ptdesc = container_of(head, struct ptdesc, pt_rcu_head); pagetable_pte_dtor(ptdesc); pagetable_free(ptdesc); } void page_table_free(struct mm_struct *mm, unsigned long *table) { - unsigned int mask, bit, half; struct ptdesc *ptdesc = virt_to_ptdesc(table); - if (!mm_alloc_pgste(mm)) { - /* Free 2K page table fragment of a 4K page */ - bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.lock); - /* - * Mark the page for delayed release. The actual release - * will happen outside of the critical section from this - * function or from __tlb_remove_table() - */ - mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); - mask >>= 24; - if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { - /* - * Other half is allocated, and neither half has had - * its free deferred: add page to head of list, to make - * this freed half available for immediate reuse. - */ - list_add(&ptdesc->pt_list, &mm->context.pgtable_list); - } else { - /* If page is on list, now remove it. */ - list_del_init(&ptdesc->pt_list); - } - spin_unlock_bh(&mm->context.lock); - mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24)); - mask >>= 24; - if (mask != 0x00U) - return; - half = 0x01U << bit; - } else { - half = 0x03U; - mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); - mask >>= 24; - } - - page_table_release_check(ptdesc_page(ptdesc), table, half, mask); - if (folio_test_clear_active(ptdesc_folio(ptdesc))) - call_rcu(&ptdesc->pt_rcu_head, pte_free_now); - else - pte_free_now(&ptdesc->pt_rcu_head); + pagetable_pte_dtor_free(ptdesc); } -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, - unsigned long vmaddr) +void __tlb_remove_table(void *table) { - struct mm_struct *mm; - unsigned int bit, mask; struct ptdesc *ptdesc = virt_to_ptdesc(table); + struct page *page = ptdesc_page(ptdesc); - mm = tlb->mm; - if (mm_alloc_pgste(mm)) { - gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) ((unsigned long)table | 0x03U); - tlb_remove_ptdesc(tlb, table); + if (compound_order(page) == CRST_ALLOC_ORDER) { + /* pmd, pud, or p4d */ + pagetable_free(ptdesc); return; } - bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.lock); - /* - * Mark the page for delayed release. The actual release will happen - * outside of the critical section from __tlb_remove_table() or from - * page_table_free() - */ - mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); - mask >>= 24; - if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { - /* - * Other half is allocated, and neither half has had - * its free deferred: add page to end of list, to make - * this freed half available for reuse once its pending - * bit has been cleared by __tlb_remove_table(). - */ - list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list); - } else { - /* If page is on list, now remove it. */ - list_del_init(&ptdesc->pt_list); - } - spin_unlock_bh(&mm->context.lock); - table = (unsigned long *) ((unsigned long) table | (0x01U << bit)); - tlb_remove_table(tlb, table); + pagetable_pte_dtor_free(ptdesc); } -void __tlb_remove_table(void *_table) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void pte_free_now(struct rcu_head *head) { - unsigned int mask = (unsigned long) _table & 0x03U, half = mask; - void *table = (void *)((unsigned long) _table ^ mask); - struct ptdesc *ptdesc = virt_to_ptdesc(table); - - switch (half) { - case 0x00U: /* pmd, pud, or p4d */ - pagetable_free(ptdesc); - return; - case 0x01U: /* lower 2K of a 4K page table */ - case 0x02U: /* higher 2K of a 4K page table */ - mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24)); - mask >>= 24; - if (mask != 0x00U) - return; - break; - case 0x03U: /* 4K page table with pgstes */ - mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); - mask >>= 24; - break; - } + struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head); - page_table_release_check(ptdesc_page(ptdesc), table, half, mask); - if (folio_test_clear_active(ptdesc_folio(ptdesc))) - call_rcu(&ptdesc->pt_rcu_head, pte_free_now); - else - pte_free_now(&ptdesc->pt_rcu_head); + pagetable_pte_dtor_free(ptdesc); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) { - struct page *page; + struct ptdesc *ptdesc = virt_to_ptdesc(pgtable); - page = virt_to_page(pgtable); - SetPageActive(page); - page_table_free(mm, (unsigned long *)pgtable); + call_rcu(&ptdesc->pt_rcu_head, pte_free_now); /* - * page_table_free() does not do the pgste gmap_unlink() which - * page_table_free_rcu() does: warn us if pgste ever reaches here. + * THPs are not allowed for KVM guests. Warn if pgste ever reaches here. + * Turn to the generic pte_free_defer() version once gmap is removed. */ WARN_ON_ONCE(mm_has_pgste(mm)); } @@ -487,11 +252,10 @@ static unsigned long *base_crst_alloc(unsigned long val) unsigned long *table; struct ptdesc *ptdesc; - ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, CRST_ALLOC_ORDER); + ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); if (!ptdesc) return NULL; table = ptdesc_address(ptdesc); - crst_table_init(table, val); return table; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 6957d2ed97bf..186a020857cf 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -12,8 +12,10 @@ #include <linux/hugetlb.h> #include <linux/slab.h> #include <linux/sort.h> +#include <asm/page-states.h> #include <asm/cacheflush.h> #include <asm/nospec-branch.h> +#include <asm/ctlreg.h> #include <asm/pgalloc.h> #include <asm/setup.h> #include <asm/tlbflush.h> @@ -45,8 +47,10 @@ void *vmem_crst_alloc(unsigned long val) unsigned long *table; table = vmem_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); + if (!table) + return NULL; + crst_table_init(table, val); + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; } @@ -62,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void) if (!pte) return NULL; memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); + __arch_set_page_dat(pte, 1); return pte; } @@ -498,6 +503,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return ret; } +#ifdef CONFIG_MEMORY_HOTPLUG + void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { @@ -506,6 +513,8 @@ void vmemmap_free(unsigned long start, unsigned long end, mutex_unlock(&vmem_mutex); } +#endif + void vmem_remove_mapping(unsigned long start, unsigned long size) { mutex_lock(&vmem_mutex); @@ -659,7 +668,7 @@ void __init vmem_map_init(void) __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); } if (MACHINE_HAS_NX) - ctl_set_bit(0, 20); + system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 5ae31ca9dd44..0547a10406e7 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -3,7 +3,7 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ +obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ pci_bus.o pci_kvm_hook.o obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index d34d5813d006..676ac74026a8 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -124,7 +124,11 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; - fib.pal = limit; + /* Work around off by one in ISM virt device */ + if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) + fib.pal = limit + (1 << 12); + else + fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, status); @@ -153,6 +157,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) int zpci_fmb_enable_device(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_iommu_ctrs *ctrs; struct zpci_fib fib = {0}; u8 cc, status; @@ -165,9 +170,15 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) WARN_ON((u64) zdev->fmb & 0xf); /* reset software counters */ - atomic64_set(&zdev->allocated_pages, 0); - atomic64_set(&zdev->mapped_pages, 0); - atomic64_set(&zdev->unmapped_pages, 0); + ctrs = zpci_get_iommu_ctrs(zdev); + if (ctrs) { + atomic64_set(&ctrs->mapped_pages, 0); + atomic64_set(&ctrs->unmapped_pages, 0); + atomic64_set(&ctrs->global_rpcits, 0); + atomic64_set(&ctrs->sync_map_rpcits, 0); + atomic64_set(&ctrs->sync_rpcits, 0); + } + fib.fmb_addr = virt_to_phys(zdev->fmb); fib.gd = zdev->gisa; @@ -582,7 +593,6 @@ int pcibios_device_add(struct pci_dev *pdev) pdev->no_vf_scan = 1; pdev->dev.groups = zpci_attr_groups; - pdev->dev.dma_ops = &s390_pci_dma_ops; zpci_map_resources(pdev); for (i = 0; i < PCI_STD_NUM_BARS; i++) { @@ -756,8 +766,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) if (zdev->dma_table) rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(zdev->dma_table), &status); - else - rc = zpci_dma_init_device(zdev); if (rc) { zpci_disable_device(zdev); return rc; @@ -865,11 +873,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); - if (zdev->dma_table) { - rc = zpci_dma_exit_device(zdev); - if (rc) - return rc; - } if (zdev_enabled(zdev)) { rc = zpci_disable_device(zdev); if (rc) @@ -918,8 +921,6 @@ void zpci_release_device(struct kref *kref) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); - if (zdev->dma_table) - zpci_dma_exit_device(zdev); if (zdev_enabled(zdev)) zpci_disable_device(zdev); @@ -1094,7 +1095,7 @@ static int __init pci_base_init(void) if (MACHINE_HAS_PCI_MIO) { static_branch_enable(&have_mio); - ctl_set_bit(2, 5); + system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT); } rc = zpci_debug_init(); @@ -1109,10 +1110,6 @@ static int __init pci_base_init(void) if (rc) goto out_irq; - rc = zpci_dma_init(); - if (rc) - goto out_dma; - rc = clp_scan_pci_devices(); if (rc) goto out_find; @@ -1122,8 +1119,6 @@ static int __init pci_base_init(void) return 0; out_find: - zpci_dma_exit(); -out_dma: zpci_irq_exit(); out_irq: zpci_mem_exit(); diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 32245b970a0c..daa5d7450c7d 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) rc = zpci_enable_device(zdev); if (rc) return rc; - rc = zpci_dma_init_device(zdev); - if (rc) { - zpci_disable_device(zdev); - return rc; - } } if (!zdev->has_resources) { diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index ca6bd98eec13..6dde2263c79d 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = { }; static char *pci_sw_names[] = { - "Allocated pages", "Mapped pages", "Unmapped pages", + "Global RPCITs", + "Sync Map RPCITs", + "Sync RPCITs", }; static void pci_fmb_show(struct seq_file *m, char *name[], int length, @@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length, static void pci_sw_counter_show(struct seq_file *m) { - struct zpci_dev *zdev = m->private; - atomic64_t *counter = &zdev->allocated_pages; + struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private); + atomic64_t *counter; int i; + if (!ctrs) + return; + + counter = &ctrs->mapped_pages; for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], atomic64_read(counter)); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c deleted file mode 100644 index 99209085c75b..000000000000 --- a/arch/s390/pci/pci_dma.c +++ /dev/null @@ -1,746 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2012 - * - * Author(s): - * Jan Glauber <jang@linux.vnet.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/export.h> -#include <linux/iommu-helper.h> -#include <linux/dma-map-ops.h> -#include <linux/vmalloc.h> -#include <linux/pci.h> -#include <asm/pci_dma.h> - -static struct kmem_cache *dma_region_table_cache; -static struct kmem_cache *dma_page_table_cache; -static int s390_iommu_strict; -static u64 s390_iommu_aperture; -static u32 s390_iommu_aperture_factor = 1; - -static int zpci_refresh_global(struct zpci_dev *zdev) -{ - return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, - zdev->iommu_pages * PAGE_SIZE); -} - -unsigned long *dma_alloc_cpu_table(gfp_t gfp) -{ - unsigned long *table, *entry; - - table = kmem_cache_alloc(dma_region_table_cache, gfp); - if (!table) - return NULL; - - for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) - *entry = ZPCI_TABLE_INVALID; - return table; -} - -static void dma_free_cpu_table(void *table) -{ - kmem_cache_free(dma_region_table_cache, table); -} - -static unsigned long *dma_alloc_page_table(gfp_t gfp) -{ - unsigned long *table, *entry; - - table = kmem_cache_alloc(dma_page_table_cache, gfp); - if (!table) - return NULL; - - for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) - *entry = ZPCI_PTE_INVALID; - return table; -} - -static void dma_free_page_table(void *table) -{ - kmem_cache_free(dma_page_table_cache, table); -} - -static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) -{ - unsigned long old_rte, rte; - unsigned long *sto; - - rte = READ_ONCE(*rtep); - if (reg_entry_isvalid(rte)) { - sto = get_rt_sto(rte); - } else { - sto = dma_alloc_cpu_table(gfp); - if (!sto) - return NULL; - - set_rt_sto(&rte, virt_to_phys(sto)); - validate_rt_entry(&rte); - entry_clr_protected(&rte); - - old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); - if (old_rte != ZPCI_TABLE_INVALID) { - /* Somone else was faster, use theirs */ - dma_free_cpu_table(sto); - sto = get_rt_sto(old_rte); - } - } - return sto; -} - -static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) -{ - unsigned long old_ste, ste; - unsigned long *pto; - - ste = READ_ONCE(*step); - if (reg_entry_isvalid(ste)) { - pto = get_st_pto(ste); - } else { - pto = dma_alloc_page_table(gfp); - if (!pto) - return NULL; - set_st_pto(&ste, virt_to_phys(pto)); - validate_st_entry(&ste); - entry_clr_protected(&ste); - - old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); - if (old_ste != ZPCI_TABLE_INVALID) { - /* Somone else was faster, use theirs */ - dma_free_page_table(pto); - pto = get_st_pto(old_ste); - } - } - return pto; -} - -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, - gfp_t gfp) -{ - unsigned long *sto, *pto; - unsigned int rtx, sx, px; - - rtx = calc_rtx(dma_addr); - sto = dma_get_seg_table_origin(&rto[rtx], gfp); - if (!sto) - return NULL; - - sx = calc_sx(dma_addr); - pto = dma_get_page_table_origin(&sto[sx], gfp); - if (!pto) - return NULL; - - px = calc_px(dma_addr); - return &pto[px]; -} - -void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) -{ - unsigned long pte; - - pte = READ_ONCE(*ptep); - if (flags & ZPCI_PTE_INVALID) { - invalidate_pt_entry(&pte); - } else { - set_pt_pfaa(&pte, page_addr); - validate_pt_entry(&pte); - } - - if (flags & ZPCI_TABLE_PROTECTED) - entry_set_protected(&pte); - else - entry_clr_protected(&pte); - - xchg(ptep, pte); -} - -static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, - dma_addr_t dma_addr, size_t size, int flags) -{ - unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - phys_addr_t page_addr = (pa & PAGE_MASK); - unsigned long *entry; - int i, rc = 0; - - if (!nr_pages) - return -EINVAL; - - if (!zdev->dma_table) - return -EINVAL; - - for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, - GFP_ATOMIC); - if (!entry) { - rc = -ENOMEM; - goto undo_cpu_trans; - } - dma_update_cpu_trans(entry, page_addr, flags); - page_addr += PAGE_SIZE; - dma_addr += PAGE_SIZE; - } - -undo_cpu_trans: - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { - flags = ZPCI_PTE_INVALID; - while (i-- > 0) { - page_addr -= PAGE_SIZE; - dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, - GFP_ATOMIC); - if (!entry) - break; - dma_update_cpu_trans(entry, page_addr, flags); - } - } - return rc; -} - -static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, - size_t size, int flags) -{ - unsigned long irqflags; - int ret; - - /* - * With zdev->tlb_refresh == 0, rpcit is not required to establish new - * translations when previously invalid translation-table entries are - * validated. With lazy unmap, rpcit is skipped for previously valid - * entries, but a global rpcit is then required before any address can - * be re-used, i.e. after each iommu bitmap wrap-around. - */ - if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { - if (!zdev->tlb_refresh) - return 0; - } else { - if (!s390_iommu_strict) - return 0; - } - - ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); - if (ret == -ENOMEM && !s390_iommu_strict) { - /* enable the hypervisor to free some resources */ - if (zpci_refresh_global(zdev)) - goto out; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, - zdev->lazy_bitmap, zdev->iommu_pages); - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); - ret = 0; - } -out: - return ret; -} - -static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, - dma_addr_t dma_addr, size_t size, int flags) -{ - int rc; - - rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); - if (rc) - return rc; - - rc = __dma_purge_tlb(zdev, dma_addr, size, flags); - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) - __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); - - return rc; -} - -void dma_free_seg_table(unsigned long entry) -{ - unsigned long *sto = get_rt_sto(entry); - int sx; - - for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) - if (reg_entry_isvalid(sto[sx])) - dma_free_page_table(get_st_pto(sto[sx])); - - dma_free_cpu_table(sto); -} - -void dma_cleanup_tables(unsigned long *table) -{ - int rtx; - - if (!table) - return; - - for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) - if (reg_entry_isvalid(table[rtx])) - dma_free_seg_table(table[rtx]); - - dma_free_cpu_table(table); -} - -static unsigned long __dma_alloc_iommu(struct device *dev, - unsigned long start, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - - return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, - start, size, zdev->start_dma >> PAGE_SHIFT, - dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), - 0); -} - -static dma_addr_t dma_alloc_address(struct device *dev, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long offset, flags; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - offset = __dma_alloc_iommu(dev, zdev->next_bit, size); - if (offset == -1) { - if (!s390_iommu_strict) { - /* global flush before DMA addresses are reused */ - if (zpci_refresh_global(zdev)) - goto out_error; - - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, - zdev->lazy_bitmap, zdev->iommu_pages); - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); - } - /* wrap-around */ - offset = __dma_alloc_iommu(dev, 0, size); - if (offset == -1) - goto out_error; - } - zdev->next_bit = offset + size; - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - - return zdev->start_dma + offset * PAGE_SIZE; - -out_error: - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - return DMA_MAPPING_ERROR; -} - -static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long flags, offset; - - offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - if (!zdev->iommu_bitmap) - goto out; - - if (s390_iommu_strict) - bitmap_clear(zdev->iommu_bitmap, offset, size); - else - bitmap_set(zdev->lazy_bitmap, offset, size); - -out: - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); -} - -static inline void zpci_err_dma(unsigned long rc, unsigned long addr) -{ - struct { - unsigned long rc; - unsigned long addr; - } __packed data = {rc, addr}; - - zpci_err_hex(&data, sizeof(data)); -} - -static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long pa = page_to_phys(page) + offset; - int flags = ZPCI_PTE_VALID; - unsigned long nr_pages; - dma_addr_t dma_addr; - int ret; - - /* This rounds up number of pages based on size and offset */ - nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - dma_addr = dma_alloc_address(dev, nr_pages); - if (dma_addr == DMA_MAPPING_ERROR) { - ret = -ENOSPC; - goto out_err; - } - - /* Use rounded up size */ - size = nr_pages * PAGE_SIZE; - - if (direction == DMA_NONE || direction == DMA_TO_DEVICE) - flags |= ZPCI_TABLE_PROTECTED; - - ret = dma_update_trans(zdev, pa, dma_addr, size, flags); - if (ret) - goto out_free; - - atomic64_add(nr_pages, &zdev->mapped_pages); - return dma_addr + (offset & ~PAGE_MASK); - -out_free: - dma_free_address(dev, dma_addr, nr_pages); -out_err: - zpci_err("map error:\n"); - zpci_err_dma(ret, pa); - return DMA_MAPPING_ERROR; -} - -static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - int npages, ret; - - npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - dma_addr = dma_addr & PAGE_MASK; - ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, - ZPCI_PTE_INVALID); - if (ret) { - zpci_err("unmap error:\n"); - zpci_err_dma(ret, dma_addr); - return; - } - - atomic64_add(npages, &zdev->unmapped_pages); - dma_free_address(dev, dma_addr, npages); -} - -static void *s390_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - struct page *page; - phys_addr_t pa; - dma_addr_t map; - - size = PAGE_ALIGN(size); - page = alloc_pages(flag | __GFP_ZERO, get_order(size)); - if (!page) - return NULL; - - pa = page_to_phys(page); - map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); - if (dma_mapping_error(dev, map)) { - __free_pages(page, get_order(size)); - return NULL; - } - - atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); - if (dma_handle) - *dma_handle = map; - return phys_to_virt(pa); -} - -static void s390_dma_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - - size = PAGE_ALIGN(size); - atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); - s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); - free_pages((unsigned long)vaddr, get_order(size)); -} - -/* Map a segment into a contiguous dma address area */ -static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, - size_t size, dma_addr_t *handle, - enum dma_data_direction dir) -{ - unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - dma_addr_t dma_addr_base, dma_addr; - int flags = ZPCI_PTE_VALID; - struct scatterlist *s; - phys_addr_t pa = 0; - int ret; - - dma_addr_base = dma_alloc_address(dev, nr_pages); - if (dma_addr_base == DMA_MAPPING_ERROR) - return -ENOMEM; - - dma_addr = dma_addr_base; - if (dir == DMA_NONE || dir == DMA_TO_DEVICE) - flags |= ZPCI_TABLE_PROTECTED; - - for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { - pa = page_to_phys(sg_page(s)); - ret = __dma_update_trans(zdev, pa, dma_addr, - s->offset + s->length, flags); - if (ret) - goto unmap; - - dma_addr += s->offset + s->length; - } - ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); - if (ret) - goto unmap; - - *handle = dma_addr_base; - atomic64_add(nr_pages, &zdev->mapped_pages); - - return ret; - -unmap: - dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, - ZPCI_PTE_INVALID); - dma_free_address(dev, dma_addr_base, nr_pages); - zpci_err("map error:\n"); - zpci_err_dma(ret, pa); - return ret; -} - -static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nr_elements, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s = sg, *start = sg, *dma = sg; - unsigned int max = dma_get_max_seg_size(dev); - unsigned int size = s->offset + s->length; - unsigned int offset = s->offset; - int count = 0, i, ret; - - for (i = 1; i < nr_elements; i++) { - s = sg_next(s); - - s->dma_length = 0; - - if (s->offset || (size & ~PAGE_MASK) || - size + s->length > max) { - ret = __s390_dma_map_sg(dev, start, size, - &dma->dma_address, dir); - if (ret) - goto unmap; - - dma->dma_address += offset; - dma->dma_length = size - offset; - - size = offset = s->offset; - start = s; - dma = sg_next(dma); - count++; - } - size += s->length; - } - ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir); - if (ret) - goto unmap; - - dma->dma_address += offset; - dma->dma_length = size - offset; - - return count + 1; -unmap: - for_each_sg(sg, s, count, i) - s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), - dir, attrs); - - return ret; -} - -static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nr_elements, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nr_elements, i) { - if (s->dma_length) - s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, - dir, attrs); - s->dma_address = 0; - s->dma_length = 0; - } -} - -static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags) -{ - size_t n = BITS_TO_LONGS(bits); - size_t bytes; - - if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes))) - return NULL; - - return vzalloc(bytes); -} - -int zpci_dma_init_device(struct zpci_dev *zdev) -{ - u8 status; - int rc; - - /* - * At this point, if the device is part of an IOMMU domain, this would - * be a strong hint towards a bug in the IOMMU API (common) code and/or - * simultaneous access via IOMMU and DMA API. So let's issue a warning. - */ - WARN_ON(zdev->s390_domain); - - spin_lock_init(&zdev->iommu_bitmap_lock); - - zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); - if (!zdev->dma_table) { - rc = -ENOMEM; - goto out; - } - - /* - * Restrict the iommu bitmap size to the minimum of the following: - * - s390_iommu_aperture which defaults to high_memory - * - 3-level pagetable address limit minus start_dma offset - * - DMA address range allowed by the hardware (clp query pci fn) - * - * Also set zdev->end_dma to the actual end address of the usable - * range, instead of the theoretical maximum as reported by hardware. - * - * This limits the number of concurrently usable DMA mappings since - * for each DMA mapped memory address we need a DMA address including - * extra DMA addresses for multiple mappings of the same memory address. - */ - zdev->start_dma = PAGE_ALIGN(zdev->start_dma); - zdev->iommu_size = min3(s390_iommu_aperture, - ZPCI_TABLE_SIZE_RT - zdev->start_dma, - zdev->end_dma - zdev->start_dma + 1); - zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; - zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); - if (!zdev->iommu_bitmap) { - rc = -ENOMEM; - goto free_dma_table; - } - if (!s390_iommu_strict) { - zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); - if (!zdev->lazy_bitmap) { - rc = -ENOMEM; - goto free_bitmap; - } - - } - if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status)) { - rc = -EIO; - goto free_bitmap; - } - - return 0; -free_bitmap: - vfree(zdev->iommu_bitmap); - zdev->iommu_bitmap = NULL; - vfree(zdev->lazy_bitmap); - zdev->lazy_bitmap = NULL; -free_dma_table: - dma_free_cpu_table(zdev->dma_table); - zdev->dma_table = NULL; -out: - return rc; -} - -int zpci_dma_exit_device(struct zpci_dev *zdev) -{ - int cc = 0; - - /* - * At this point, if the device is part of an IOMMU domain, this would - * be a strong hint towards a bug in the IOMMU API (common) code and/or - * simultaneous access via IOMMU and DMA API. So let's issue a warning. - */ - WARN_ON(zdev->s390_domain); - if (zdev_enabled(zdev)) - cc = zpci_unregister_ioat(zdev, 0); - /* - * cc == 3 indicates the function is gone already. This can happen - * if the function was deconfigured/disabled suddenly and we have not - * received a new handle yet. - */ - if (cc && cc != 3) - return -EIO; - - dma_cleanup_tables(zdev->dma_table); - zdev->dma_table = NULL; - vfree(zdev->iommu_bitmap); - zdev->iommu_bitmap = NULL; - vfree(zdev->lazy_bitmap); - zdev->lazy_bitmap = NULL; - zdev->next_bit = 0; - return 0; -} - -static int __init dma_alloc_cpu_table_caches(void) -{ - dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", - ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, - 0, NULL); - if (!dma_region_table_cache) - return -ENOMEM; - - dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", - ZPCI_PT_SIZE, ZPCI_PT_ALIGN, - 0, NULL); - if (!dma_page_table_cache) { - kmem_cache_destroy(dma_region_table_cache); - return -ENOMEM; - } - return 0; -} - -int __init zpci_dma_init(void) -{ - s390_iommu_aperture = (u64)virt_to_phys(high_memory); - if (!s390_iommu_aperture_factor) - s390_iommu_aperture = ULONG_MAX; - else - s390_iommu_aperture *= s390_iommu_aperture_factor; - - return dma_alloc_cpu_table_caches(); -} - -void zpci_dma_exit(void) -{ - kmem_cache_destroy(dma_page_table_cache); - kmem_cache_destroy(dma_region_table_cache); -} - -const struct dma_map_ops s390_pci_dma_ops = { - .alloc = s390_dma_alloc, - .free = s390_dma_free, - .map_sg = s390_dma_map_sg, - .unmap_sg = s390_dma_unmap_sg, - .map_page = s390_dma_map_pages, - .unmap_page = s390_dma_unmap_pages, - .mmap = dma_common_mmap, - .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - /* dma_supported is unconditionally true without a callback */ -}; -EXPORT_SYMBOL_GPL(s390_pci_dma_ops); - -static int __init s390_iommu_setup(char *str) -{ - if (!strcmp(str, "strict")) - s390_iommu_strict = 1; - return 1; -} - -__setup("s390_iommu=", s390_iommu_setup); - -static int __init s390_iommu_aperture_setup(char *str) -{ - if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) - s390_iommu_aperture_factor = 1; - return 1; -} - -__setup("s390_iommu_aperture=", s390_iommu_aperture_setup); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b9324ca2eb94..4d9773ef9e0a 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -59,9 +59,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) } } -static bool is_passed_through(struct zpci_dev *zdev) +static bool is_passed_through(struct pci_dev *pdev) { - return zdev->s390_domain; + struct zpci_dev *zdev = to_zpci(pdev); + bool ret; + + mutex_lock(&zdev->kzdev_lock); + ret = !!zdev->kzdev; + mutex_unlock(&zdev->kzdev_lock); + + return ret; } static bool is_driver_supported(struct pci_driver *driver) @@ -176,7 +183,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) } pdev->error_state = pci_channel_io_frozen; - if (is_passed_through(to_zpci(pdev))) { + if (is_passed_through(pdev)) { pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", pci_name(pdev)); goto out_unlock; @@ -239,7 +246,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) * we will inject the error event and let the guest recover the device * itself. */ - if (is_passed_through(to_zpci(pdev))) + if (is_passed_through(pdev)) goto out; driver = to_pci_driver(pdev->dev.driver); if (driver && driver->err_handler && driver->err_handler->error_detected) @@ -306,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) /* Even though the device is already gone we still * need to free zPCI resources as part of the disable. */ - if (zdev->dma_table) - zpci_dma_exit_device(zdev); if (zdev_enabled(zdev)) zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index cae280e5c047..8a7abac51816 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); int ret = 0; + u8 status; /* Can't use device_remove_self() here as that would lead us to lock * the pci_rescan_remove_lock while holding the device' kernfs lock. @@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, pci_lock_rescan_remove(); if (pci_dev_is_added(pdev)) { pci_stop_and_remove_bus_device(pdev); - if (zdev->dma_table) { - ret = zpci_dma_exit_device(zdev); - if (ret) - goto out; - } - if (zdev_enabled(zdev)) { ret = zpci_disable_device(zdev); /* @@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ret = zpci_enable_device(zdev); if (ret) goto out; - ret = zpci_dma_init_device(zdev); - if (ret) { - zpci_disable_device(zdev); - goto out; + + if (zdev->dma_table) { + ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + virt_to_phys(zdev->dma_table), &status); + if (ret) + zpci_disable_device(zdev); } - pci_rescan_bus(zdev->zbus->bus); } out: + pci_rescan_bus(zdev->zbus->bus); pci_unlock_rescan_remove(); if (kn) sysfs_unbreak_active_protection(kn); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index b3da2757faaf..3d80515298d2 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -7,7 +7,6 @@ * Copyright (C) 1999 Niibe Yutaka * Copyright (C) 2002 - 2010 Paul Mundt */ -#include <linux/screen_info.h> #include <linux/ioport.h> #include <linux/init.h> #include <linux/initrd.h> @@ -69,10 +68,6 @@ EXPORT_SYMBOL(cpu_data); struct sh_machine_vector sh_mv = { .mv_name = "generic", }; EXPORT_SYMBOL(sh_mv); -#ifdef CONFIG_VT -struct screen_info screen_info; -#endif - extern int root_mountflags; #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 7417345c6639..5f6035936131 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -76,9 +76,8 @@ install: archheaders: $(Q)$(MAKE) $(build)=arch/sparc/kernel/syscalls all -PHONY += vdso_install -vdso_install: - $(Q)$(MAKE) $(build)=arch/sparc/vdso $@ +vdso-install-$(CONFIG_SPARC64) += arch/sparc/vdso/vdso64.so.dbg +vdso-install-$(CONFIG_COMPAT) += arch/sparc/vdso/vdso32.so.dbg # This is the image used for packaging KBUILD_IMAGE := $(boot)/zImage diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 34ef7febf0d5..e3b72a7b46d3 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -17,7 +17,6 @@ #include <linux/initrd.h> #include <asm/smp.h> #include <linux/user.h> -#include <linux/screen_info.h> #include <linux/delay.h> #include <linux/fs.h> #include <linux/seq_file.h> @@ -51,18 +50,6 @@ #include "kernel.h" -struct screen_info screen_info = { - 0, 0, /* orig-x, orig-y */ - 0, /* unused */ - 0, /* orig-video-page */ - 0, /* orig-video-mode */ - 128, /* orig-video-cols */ - 0,0,0, /* ega_ax, ega_bx, ega_cx */ - 54, /* orig-video-lines */ - 0, /* orig-video-isVGA */ - 16 /* orig-video-points */ -}; - /* Typing sync at the prom prompt calls the function pointed to by * romvec->pv_synchook which I set to the following function. * This should sync all filesystems and return, for now it just diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 6546ca9d4d3f..6a4797dec34b 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -15,7 +15,6 @@ #include <linux/ptrace.h> #include <asm/smp.h> #include <linux/user.h> -#include <linux/screen_info.h> #include <linux/delay.h> #include <linux/fs.h> #include <linux/seq_file.h> @@ -68,18 +67,6 @@ DEFINE_SPINLOCK(ns87303_lock); EXPORT_SYMBOL(ns87303_lock); -struct screen_info screen_info = { - 0, 0, /* orig-x, orig-y */ - 0, /* unused */ - 0, /* orig-video-page */ - 0, /* orig-video-mode */ - 128, /* orig-video-cols */ - 0, 0, 0, /* unused, ega_bx, unused */ - 54, /* orig-video-lines */ - 0, /* orig-video-isVGA */ - 16 /* orig-video-points */ -}; - static void prom_console_write(struct console *con, const char *s, unsigned int n) { diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 77d7b9032158..d08c3a0443f3 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -116,30 +116,3 @@ quiet_cmd_vdso = VDSO $@ VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic GCOV_PROFILE := n - -# -# Install the unstripped copies of vdso*.so. If our toolchain supports -# build-id, install .build-id links as well. -# -quiet_cmd_vdso_install = INSTALL $(@:install_%=%) -define cmd_vdso_install - cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \ - if readelf -n $< |grep -q 'Build ID'; then \ - buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ - first=`echo $$buildid | cut -b-2`; \ - last=`echo $$buildid | cut -b3-`; \ - mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ - ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ - fi -endef - -vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) - -$(MODLIB)/vdso: FORCE - @mkdir -p $(MODLIB)/vdso - -$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE - $(call cmd,vdso_install) - -PHONY += vdso_install $(vdso_img_insttargets) -vdso_install: $(vdso_img_insttargets) FORCE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a917f62eff2..3762f41bb092 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1313,16 +1313,41 @@ config MICROCODE def_bool y depends on CPU_SUP_AMD || CPU_SUP_INTEL +config MICROCODE_INITRD32 + def_bool y + depends on MICROCODE && X86_32 && BLK_DEV_INITRD + config MICROCODE_LATE_LOADING bool "Late microcode loading (DANGEROUS)" default n - depends on MICROCODE + depends on MICROCODE && SMP help Loading microcode late, when the system is up and executing instructions is a tricky business and should be avoided if possible. Just the sequence of synchronizing all cores and SMT threads is one fragile dance which does not guarantee that cores might not softlock after the loading. Therefore, - use this at your own risk. Late loading taints the kernel too. + use this at your own risk. Late loading taints the kernel unless the + microcode header indicates that it is safe for late loading via the + minimal revision check. This minimal revision check can be enforced on + the kernel command line with "microcode.minrev=Y". + +config MICROCODE_LATE_FORCE_MINREV + bool "Enforce late microcode loading minimal revision check" + default n + depends on MICROCODE_LATE_LOADING + help + To prevent that users load microcode late which modifies already + in use features, newer microcode patches have a minimum revision field + in the microcode header, which tells the kernel which minimum + revision must be active in the CPU to safely load that new microcode + late into the running system. If disabled the check will not + be enforced but the kernel will be tainted when the minimal + revision check fails. + + This minimal revision check can also be controlled via the + "microcode.minrev" parameter on the kernel command line. + + If unsure say Y. config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4de6ddaf4b84..1a068de12a56 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -294,9 +294,10 @@ PHONY += install install: $(call cmd,install) -PHONY += vdso_install -vdso_install: - $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@ +vdso-install-$(CONFIG_X86_64) += arch/x86/entry/vdso/vdso64.so.dbg +vdso-install-$(CONFIG_X86_X32_ABI) += arch/x86/entry/vdso/vdsox32.so.dbg +vdso-install-$(CONFIG_X86_32) += arch/x86/entry/vdso/vdso32.so.dbg +vdso-install-$(CONFIG_IA32_EMULATION) += arch/x86/entry/vdso/vdso32.so.dbg archprepare: checkbin checkbin: diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink index 936093d29160..fef2e977cc7d 100644 --- a/arch/x86/Makefile.postlink +++ b/arch/x86/Makefile.postlink @@ -34,9 +34,6 @@ ifeq ($(CONFIG_X86_NEED_RELOCS),y) $(call cmd,strip_relocs) endif -%.ko: FORCE - @true - clean: @rm -f $(OUT_RELOCS)/vmlinux.relocs diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index d11206ceff3b..1b5d17a9f70d 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -106,6 +106,27 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport) } EXPORT_SYMBOL_GPL(tdx_mcall_get_report0); +/** + * tdx_hcall_get_quote() - Wrapper to request TD Quote using GetQuote + * hypercall. + * @buf: Address of the directly mapped shared kernel buffer which + * contains TDREPORT. The same buffer will be used by VMM to + * store the generated TD Quote output. + * @size: size of the tdquote buffer (4KB-aligned). + * + * Refer to section titled "TDG.VP.VMCALL<GetQuote>" in the TDX GHCI + * v1.0 specification for more information on GetQuote hypercall. + * It is used in the TDX guest driver module to get the TD Quote. + * + * Return 0 on success or error code on failure. + */ +u64 tdx_hcall_get_quote(u8 *buf, size_t size) +{ + /* Since buf is a shared memory, set the shared (decrypted) bits */ + return _tdx_hypercall(TDVMCALL_GET_QUOTE, cc_mkdec(virt_to_phys(buf)), size, 0, 0); +} +EXPORT_SYMBOL_GPL(tdx_hcall_get_quote); + static void __noreturn tdx_panic(const char *msg) { struct tdx_module_args args = { diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 83c0afb7c741..b1b8dd1608f7 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -191,31 +191,4 @@ GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) -# -# Install the unstripped copies of vdso*.so. If our toolchain supports -# build-id, install .build-id links as well. -# -quiet_cmd_vdso_install = INSTALL $(@:install_%=%) -define cmd_vdso_install - cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \ - if readelf -n $< |grep -q 'Build ID'; then \ - buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ - first=`echo $$buildid | cut -b-2`; \ - last=`echo $$buildid | cut -b3-`; \ - mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ - ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ - fi -endef - -vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) - -$(MODLIB)/vdso: FORCE - @mkdir -p $(MODLIB)/vdso - -$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso - $(call cmd,vdso_install) - -PHONY += vdso_install $(vdso_img_insttargets) -vdso_install: $(vdso_img_insttargets) - clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so* diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b0d192f613b7..d21f48f1c242 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -276,7 +276,8 @@ struct apic { u32 disable_esr : 1, dest_mode_logical : 1, - x2apic_set_max_apicid : 1; + x2apic_set_max_apicid : 1, + nmi_to_offline_cpu : 1; u32 (*calc_dest_apicid)(unsigned int cpu); @@ -531,6 +532,8 @@ extern u32 apic_flat_calc_apicid(unsigned int cpu); extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern u32 default_cpu_present_to_apicid(int mps_cpu); +void apic_send_nmi_to_offline_cpu(unsigned int cpu); + #else /* CONFIG_X86_LOCAL_APIC */ static inline u32 read_apic_id(void) { return 0; } diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 25050d953eee..fecc4fe1d68a 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -71,26 +71,12 @@ static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} extern __noendbr void cet_disable(void); -struct ucode_cpu_info; +struct cpu_signature; -int intel_cpu_collect_info(struct ucode_cpu_info *uci); - -static inline bool intel_cpu_signatures_match(unsigned int s1, unsigned int p1, - unsigned int s2, unsigned int p2) -{ - if (s1 != s2) - return false; - - /* Processor flags are either both 0 ... */ - if (!p1 && !p2) - return true; - - /* ... or they intersect. */ - return p1 & p2; -} +void intel_collect_cpu_info(struct cpu_signature *sig); extern u64 x86_read_arch_cap_msr(void); -int intel_find_matching_signature(void *mc, unsigned int csig, int cpf); +bool intel_find_matching_signature(void *mc, struct cpu_signature *sig); int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type); extern struct cpumask cpus_stop_mask; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index bbbe9d744977..695e569159c1 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -23,6 +23,8 @@ static inline void load_ucode_ap(void) { } static inline void microcode_bsp_resume(void) { } #endif +extern unsigned long initrd_start_early; + #ifdef CONFIG_CPU_SUP_INTEL /* Intel specific microcode defines. Public for IFS */ struct microcode_header_intel { @@ -36,7 +38,8 @@ struct microcode_header_intel { unsigned int datasize; unsigned int totalsize; unsigned int metasize; - unsigned int reserved[2]; + unsigned int min_req_ver; + unsigned int reserved; }; struct microcode_intel { @@ -68,11 +71,19 @@ static inline u32 intel_get_microcode_revision(void) return rev; } +#endif /* !CONFIG_CPU_SUP_INTEL */ -void show_ucode_info_early(void); +bool microcode_nmi_handler(void); +void microcode_offline_nmi_handler(void); -#else /* CONFIG_CPU_SUP_INTEL */ -static inline void show_ucode_info_early(void) { } -#endif /* !CONFIG_CPU_SUP_INTEL */ +#ifdef CONFIG_MICROCODE_LATE_LOADING +DECLARE_STATIC_KEY_FALSE(microcode_nmi_handler_enable); +static __always_inline bool microcode_nmi_handler_enabled(void) +{ + return static_branch_unlikely(µcode_nmi_handler_enable); +} +#else +static __always_inline bool microcode_nmi_handler_enabled(void) { return false; } +#endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index f3495623ac99..bf483fcb4e57 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -126,6 +126,7 @@ void clear_bss(void); #ifdef __i386__ asmlinkage void __init __noreturn i386_start_kernel(void); +void __init mk_early_pgtbl_32(void); #else asmlinkage void __init __noreturn x86_64_start_kernel(char *real_mode); diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index f74695dea217..ccce7ebd8677 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -23,6 +23,7 @@ /* TDX hypercall Leaf IDs */ #define TDVMCALL_MAP_GPA 0x10001 +#define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 #define TDVMCALL_STATUS_RETRY 1 diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index adcbe3f1de30..f3d5305a60fc 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -56,6 +56,8 @@ bool tdx_early_handle_ve(struct pt_regs *regs); int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport); +u64 tdx_hcall_get_quote(u8 *buf, size_t size); + #else static inline void tdx_early_init(void) { }; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3269a0e23d3a..0000325ab98f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -16,6 +16,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg +CFLAGS_REMOVE_head32.o = -pg CFLAGS_REMOVE_sev.o = -pg CFLAGS_REMOVE_rethook.o = -pg endif diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 37daa3fd6819..7139867d69cd 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -103,6 +103,7 @@ static struct apic apic_flat __ro_after_init = { .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, + .nmi_to_offline_cpu = true, .read = native_apic_mem_read, .write = native_apic_mem_write, @@ -173,6 +174,7 @@ static struct apic apic_physflat __ro_after_init = { .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, + .nmi_to_offline_cpu = true, .read = native_apic_mem_read, .write = native_apic_mem_write, diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 0078730a512e..5da693d633b7 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -97,6 +97,14 @@ sendmask: __apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR); } +void apic_send_nmi_to_offline_cpu(unsigned int cpu) +{ + if (WARN_ON_ONCE(!apic->nmi_to_offline_cpu)) + return; + if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, &cpus_booted_once_mask))) + return; + apic->send_IPI(cpu, NMI_VECTOR); +} #endif /* CONFIG_SMP */ static inline int __prepare_ICR2(unsigned int mask) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index affbff65e497..a8306089c91b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -251,6 +251,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, + .nmi_to_offline_cpu = true, .read = native_apic_msr_read, .write = native_apic_msr_write, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 7c9fe28f742f..558a4a8824f4 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -166,6 +166,7 @@ static struct apic apic_x2apic_phys __ro_after_init = { .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, + .nmi_to_offline_cpu = true, .read = native_apic_msr_read, .write = native_apic_msr_write, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5d9591146244..b14fc8c1c953 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2164,8 +2164,6 @@ static inline void setup_getcpu(int cpu) } #ifdef CONFIG_X86_64 -static inline void ucode_cpu_init(int cpu) { } - static inline void tss_setup_ist(struct tss_struct *tss) { /* Set up the per-CPU TSS IST stacks */ @@ -2176,16 +2174,8 @@ static inline void tss_setup_ist(struct tss_struct *tss) /* Only mapped when SEV-ES is active */ tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC); } - #else /* CONFIG_X86_64 */ - -static inline void ucode_cpu_init(int cpu) -{ - show_ucode_info_early(); -} - static inline void tss_setup_ist(struct tss_struct *tss) { } - #endif /* !CONFIG_X86_64 */ static inline void tss_setup_io_bitmap(struct tss_struct *tss) @@ -2241,8 +2231,6 @@ void cpu_init(void) struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - ucode_cpu_init(cpu); - #ifdef CONFIG_NUMA if (this_cpu_read(numa_node) == 0 && early_cpu_to_node(cpu) != NUMA_NO_NODE) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index bbd1dc38ea03..9373ec01c5ae 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -37,6 +37,16 @@ #include "internal.h" +struct ucode_patch { + struct list_head plist; + void *data; + unsigned int size; + u32 patch_id; + u16 equiv_cpu; +}; + +static LIST_HEAD(microcode_cache); + #define UCODE_MAGIC 0x00414d44 #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 #define UCODE_UCODE_TYPE 0x00000001 @@ -121,24 +131,20 @@ static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig) /* * Check whether there is a valid microcode container file at the beginning - * of @buf of size @buf_size. Set @early to use this function in the early path. + * of @buf of size @buf_size. */ -static bool verify_container(const u8 *buf, size_t buf_size, bool early) +static bool verify_container(const u8 *buf, size_t buf_size) { u32 cont_magic; if (buf_size <= CONTAINER_HDR_SZ) { - if (!early) - pr_debug("Truncated microcode container header.\n"); - + pr_debug("Truncated microcode container header.\n"); return false; } cont_magic = *(const u32 *)buf; if (cont_magic != UCODE_MAGIC) { - if (!early) - pr_debug("Invalid magic value (0x%08x).\n", cont_magic); - + pr_debug("Invalid magic value (0x%08x).\n", cont_magic); return false; } @@ -147,23 +153,20 @@ static bool verify_container(const u8 *buf, size_t buf_size, bool early) /* * Check whether there is a valid, non-truncated CPU equivalence table at the - * beginning of @buf of size @buf_size. Set @early to use this function in the - * early path. + * beginning of @buf of size @buf_size. */ -static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early) +static bool verify_equivalence_table(const u8 *buf, size_t buf_size) { const u32 *hdr = (const u32 *)buf; u32 cont_type, equiv_tbl_len; - if (!verify_container(buf, buf_size, early)) + if (!verify_container(buf, buf_size)) return false; cont_type = hdr[1]; if (cont_type != UCODE_EQUIV_CPU_TABLE_TYPE) { - if (!early) - pr_debug("Wrong microcode container equivalence table type: %u.\n", - cont_type); - + pr_debug("Wrong microcode container equivalence table type: %u.\n", + cont_type); return false; } @@ -172,9 +175,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early) equiv_tbl_len = hdr[2]; if (equiv_tbl_len < sizeof(struct equiv_cpu_entry) || buf_size < equiv_tbl_len) { - if (!early) - pr_debug("Truncated equivalence table.\n"); - + pr_debug("Truncated equivalence table.\n"); return false; } @@ -183,22 +184,19 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early) /* * Check whether there is a valid, non-truncated microcode patch section at the - * beginning of @buf of size @buf_size. Set @early to use this function in the - * early path. + * beginning of @buf of size @buf_size. * * On success, @sh_psize returns the patch size according to the section header, * to the caller. */ static bool -__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize, bool early) +__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize) { u32 p_type, p_size; const u32 *hdr; if (buf_size < SECTION_HDR_SIZE) { - if (!early) - pr_debug("Truncated patch section.\n"); - + pr_debug("Truncated patch section.\n"); return false; } @@ -207,17 +205,13 @@ __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize, bool early p_size = hdr[1]; if (p_type != UCODE_UCODE_TYPE) { - if (!early) - pr_debug("Invalid type field (0x%x) in container file section header.\n", - p_type); - + pr_debug("Invalid type field (0x%x) in container file section header.\n", + p_type); return false; } if (p_size < sizeof(struct microcode_header_amd)) { - if (!early) - pr_debug("Patch of size %u too short.\n", p_size); - + pr_debug("Patch of size %u too short.\n", p_size); return false; } @@ -269,7 +263,7 @@ static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size * 0: success */ static int -verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool early) +verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size) { struct microcode_header_amd *mc_hdr; unsigned int ret; @@ -277,7 +271,7 @@ verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool ea u16 proc_id; u8 patch_fam; - if (!__verify_patch_section(buf, buf_size, &sh_psize, early)) + if (!__verify_patch_section(buf, buf_size, &sh_psize)) return -1; /* @@ -292,16 +286,13 @@ verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool ea * size sh_psize, as the section claims. */ if (buf_size < sh_psize) { - if (!early) - pr_debug("Patch of size %u truncated.\n", sh_psize); - + pr_debug("Patch of size %u truncated.\n", sh_psize); return -1; } ret = __verify_patch_size(family, sh_psize, buf_size); if (!ret) { - if (!early) - pr_debug("Per-family patch size mismatch.\n"); + pr_debug("Per-family patch size mismatch.\n"); return -1; } @@ -309,8 +300,7 @@ verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool ea mc_hdr = (struct microcode_header_amd *)(buf + SECTION_HDR_SIZE); if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - if (!early) - pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", mc_hdr->patch_id); + pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", mc_hdr->patch_id); return -1; } @@ -337,7 +327,7 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc) u16 eq_id; u8 *buf; - if (!verify_equivalence_table(ucode, size, true)) + if (!verify_equivalence_table(ucode, size)) return 0; buf = ucode; @@ -364,7 +354,7 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc) u32 patch_size; int ret; - ret = verify_patch(x86_family(desc->cpuid_1_eax), buf, size, &patch_size, true); + ret = verify_patch(x86_family(desc->cpuid_1_eax), buf, size, &patch_size); if (ret < 0) { /* * Patch verification failed, skip to the next container, if @@ -456,14 +446,8 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) { struct cont_desc desc = { 0 }; struct microcode_amd *mc; - u32 rev, dummy, *new_rev; bool ret = false; - -#ifdef CONFIG_X86_32 - new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); -#else - new_rev = &ucode_new_rev; -#endif + u32 rev, dummy; desc.cpuid_1_eax = cpuid_1_eax; @@ -484,8 +468,8 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) return ret; if (!__apply_microcode_amd(mc)) { - *new_rev = mc->hdr.patch_id; - ret = true; + ucode_new_rev = mc->hdr.patch_id; + ret = true; } return ret; @@ -501,7 +485,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) if (family >= 0x15) snprintf(fw_name, sizeof(fw_name), - "amd-ucode/microcode_amd_fam%.2xh.bin", family); + "amd-ucode/microcode_amd_fam%02hhxh.bin", family); if (firmware_request_builtin(&fw, fw_name)) { cp->size = fw.size; @@ -512,36 +496,23 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) return false; } -static void find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpio_data *ret) +static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpio_data *ret) { - struct ucode_cpu_info *uci; struct cpio_data cp; - const char *path; - bool use_pa; - - if (IS_ENABLED(CONFIG_X86_32)) { - uci = (struct ucode_cpu_info *)__pa_nodebug(ucode_cpu_info); - path = (const char *)__pa_nodebug(ucode_path); - use_pa = true; - } else { - uci = ucode_cpu_info; - path = ucode_path; - use_pa = false; - } if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) - cp = find_microcode_in_initrd(path, use_pa); - - /* Needed in load_microcode_amd() */ - uci->cpu_sig.sig = cpuid_1_eax; + cp = find_microcode_in_initrd(ucode_path); *ret = cp; } -static void apply_ucode_from_containers(unsigned int cpuid_1_eax) +void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) { struct cpio_data cp = { }; + /* Needed in load_microcode_amd() */ + ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; + find_blobs_in_containers(cpuid_1_eax, &cp); if (!(cp.data && cp.size)) return; @@ -549,20 +520,20 @@ static void apply_ucode_from_containers(unsigned int cpuid_1_eax) early_apply_microcode(cpuid_1_eax, cp.data, cp.size); } -void load_ucode_amd_early(unsigned int cpuid_1_eax) -{ - return apply_ucode_from_containers(cpuid_1_eax); -} - static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); -int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) +static int __init save_microcode_in_initrd(void) { + unsigned int cpuid_1_eax = native_cpuid_eax(1); + struct cpuinfo_x86 *c = &boot_cpu_data; struct cont_desc desc = { 0 }; enum ucode_state ret; struct cpio_data cp; - cp = find_microcode_in_initrd(ucode_path, false); + if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + return 0; + + find_blobs_in_containers(cpuid_1_eax, &cp); if (!(cp.data && cp.size)) return -EINVAL; @@ -578,6 +549,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) return 0; } +early_initcall(save_microcode_in_initrd); /* * a small, trivial cache of per-family ucode patches @@ -631,7 +603,6 @@ static struct ucode_patch *find_patch(unsigned int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; u16 equiv_id; - equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig); if (!equiv_id) return NULL; @@ -733,12 +704,20 @@ out: return ret; } +void load_ucode_amd_ap(unsigned int cpuid_1_eax) +{ + unsigned int cpu = smp_processor_id(); + + ucode_cpu_info[cpu].cpu_sig.sig = cpuid_1_eax; + apply_microcode_amd(cpu); +} + static size_t install_equiv_cpu_table(const u8 *buf, size_t buf_size) { u32 equiv_tbl_len; const u32 *hdr; - if (!verify_equivalence_table(buf, buf_size, false)) + if (!verify_equivalence_table(buf, buf_size)) return 0; hdr = (const u32 *)buf; @@ -784,7 +763,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover, u16 proc_id; int ret; - ret = verify_patch(family, fw, leftover, patch_size, false); + ret = verify_patch(family, fw, leftover, patch_size); if (ret) return ret; @@ -909,6 +888,9 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device) enum ucode_state ret = UCODE_NFOUND; const struct firmware *fw; + if (force_minrev) + return UCODE_NFOUND; + if (c->x86 >= 0x15) snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); @@ -918,7 +900,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device) } ret = UCODE_ERROR; - if (!verify_container(fw->data, fw->size, false)) + if (!verify_container(fw->data, fw->size)) goto fw_release; ret = load_microcode_amd(c->x86, fw->data, fw->size); @@ -938,10 +920,11 @@ static void microcode_fini_cpu_amd(int cpu) } static struct microcode_ops microcode_amd_ops = { - .request_microcode_fw = request_microcode_amd, - .collect_cpu_info = collect_cpu_info_amd, - .apply_microcode = apply_microcode_amd, - .microcode_fini_cpu = microcode_fini_cpu_amd, + .request_microcode_fw = request_microcode_amd, + .collect_cpu_info = collect_cpu_info_amd, + .apply_microcode = apply_microcode_amd, + .microcode_fini_cpu = microcode_fini_cpu_amd, + .nmi_safe = true, }; struct microcode_ops * __init init_amd_microcode(void) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6cc7a2c181da..666d25bbc5ad 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -23,6 +23,7 @@ #include <linux/miscdevice.h> #include <linux/capability.h> #include <linux/firmware.h> +#include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/mutex.h> @@ -31,6 +32,7 @@ #include <linux/fs.h> #include <linux/mm.h> +#include <asm/apic.h> #include <asm/cpu_device_id.h> #include <asm/perf_event.h> #include <asm/processor.h> @@ -42,11 +44,10 @@ #define DRIVER_VERSION "2.2" static struct microcode_ops *microcode_ops; -static bool dis_ucode_ldr = true; +bool dis_ucode_ldr = true; -bool initrd_gone; - -LIST_HEAD(microcode_cache); +bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); +module_param(force_minrev, bool, S_IRUSR | S_IWUSR); /* * Synchronization. @@ -90,10 +91,7 @@ static bool amd_check_current_patch_level(void) native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); - if (IS_ENABLED(CONFIG_X86_32)) - levels = (u32 *)__pa_nodebug(&final_levels); - else - levels = final_levels; + levels = final_levels; for (i = 0; levels[i]; i++) { if (lvl == levels[i]) @@ -105,17 +103,8 @@ static bool amd_check_current_patch_level(void) static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; - -#ifdef CONFIG_X86_32 - const char *cmdline = (const char *)__pa_nodebug(boot_command_line); - const char *option = (const char *)__pa_nodebug(__dis_opt_str); - bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); - -#else /* CONFIG_X86_64 */ const char *cmdline = boot_command_line; const char *option = __dis_opt_str; - bool *res = &dis_ucode_ldr; -#endif /* * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not @@ -123,17 +112,17 @@ static bool __init check_loader_disabled_bsp(void) * that's good enough as they don't land on the BSP path anyway. */ if (native_cpuid_ecx(1) & BIT(31)) - return *res; + return true; if (x86_cpuid_vendor() == X86_VENDOR_AMD) { if (amd_check_current_patch_level()) - return *res; + return true; } if (cmdline_find_option_bool(cmdline, option) <= 0) - *res = false; + dis_ucode_ldr = false; - return *res; + return dis_ucode_ldr; } void __init load_ucode_bsp(void) @@ -168,23 +157,14 @@ void __init load_ucode_bsp(void) if (intel) load_ucode_intel_bsp(); else - load_ucode_amd_early(cpuid_1_eax); -} - -static bool check_loader_disabled_ap(void) -{ -#ifdef CONFIG_X86_32 - return *((bool *)__pa_nodebug(&dis_ucode_ldr)); -#else - return dis_ucode_ldr; -#endif + load_ucode_amd_bsp(cpuid_1_eax); } void load_ucode_ap(void) { unsigned int cpuid_1_eax; - if (check_loader_disabled_ap()) + if (dis_ucode_ldr) return; cpuid_1_eax = native_cpuid_eax(1); @@ -196,97 +176,44 @@ void load_ucode_ap(void) break; case X86_VENDOR_AMD: if (x86_family(cpuid_1_eax) >= 0x10) - load_ucode_amd_early(cpuid_1_eax); - break; - default: - break; - } -} - -static int __init save_microcode_in_initrd(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - int ret = -EINVAL; - - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - if (c->x86 >= 6) - ret = save_microcode_in_initrd_intel(); - break; - case X86_VENDOR_AMD: - if (c->x86 >= 0x10) - ret = save_microcode_in_initrd_amd(cpuid_eax(1)); + load_ucode_amd_ap(cpuid_1_eax); break; default: break; } - - initrd_gone = true; - - return ret; } -struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) +struct cpio_data __init find_microcode_in_initrd(const char *path) { #ifdef CONFIG_BLK_DEV_INITRD unsigned long start = 0; size_t size; #ifdef CONFIG_X86_32 - struct boot_params *params; - - if (use_pa) - params = (struct boot_params *)__pa_nodebug(&boot_params); - else - params = &boot_params; - - size = params->hdr.ramdisk_size; - - /* - * Set start only if we have an initrd image. We cannot use initrd_start - * because it is not set that early yet. - */ + size = boot_params.hdr.ramdisk_size; + /* Early load on BSP has a temporary mapping. */ if (size) - start = params->hdr.ramdisk_image; + start = initrd_start_early; -# else /* CONFIG_X86_64 */ +#else /* CONFIG_X86_64 */ size = (unsigned long)boot_params.ext_ramdisk_size << 32; size |= boot_params.hdr.ramdisk_size; if (size) { start = (unsigned long)boot_params.ext_ramdisk_image << 32; start |= boot_params.hdr.ramdisk_image; - start += PAGE_OFFSET; } -# endif +#endif /* * Fixup the start address: after reserve_initrd() runs, initrd_start * has the virtual address of the beginning of the initrd. It also * possibly relocates the ramdisk. In either case, initrd_start contains * the updated address so use that instead. - * - * initrd_gone is for the hotplug case where we've thrown out initrd - * already. */ - if (!use_pa) { - if (initrd_gone) - return (struct cpio_data){ NULL, 0, "" }; - if (initrd_start) - start = initrd_start; - } else { - /* - * The picture with physical addresses is a bit different: we - * need to get the *physical* address to which the ramdisk was - * relocated, i.e., relocated_ramdisk (not initrd_start) and - * since we're running from physical addresses, we need to access - * relocated_ramdisk through its *physical* address too. - */ - u64 *rr = (u64 *)__pa_nodebug(&relocated_ramdisk); - if (*rr) - start = *rr; - } + if (initrd_start) + start = initrd_start; return find_cpio_data(path, (void *)start, size, NULL); #else /* !CONFIG_BLK_DEV_INITRD */ @@ -330,117 +257,298 @@ static struct platform_device *microcode_pdev; * requirement can be relaxed in the future. Right now, this is conservative * and good. */ -#define SPINUNIT 100 /* 100 nsec */ +enum sibling_ctrl { + /* Spinwait with timeout */ + SCTRL_WAIT, + /* Invoke the microcode_apply() callback */ + SCTRL_APPLY, + /* Proceed without invoking the microcode_apply() callback */ + SCTRL_DONE, +}; + +struct microcode_ctrl { + enum sibling_ctrl ctrl; + enum ucode_state result; + unsigned int ctrl_cpu; + bool nmi_enabled; +}; -static int check_online_cpus(void) +DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable); +static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl); +static atomic_t late_cpus_in, offline_in_nmi; +static unsigned int loops_per_usec; +static cpumask_t cpu_offline_mask; + +static noinstr bool wait_for_cpus(atomic_t *cnt) { - unsigned int cpu; + unsigned int timeout, loops; - /* - * Make sure all CPUs are online. It's fine for SMT to be disabled if - * all the primary threads are still online. - */ - for_each_present_cpu(cpu) { - if (topology_is_primary_thread(cpu) && !cpu_online(cpu)) { - pr_err("Not all CPUs online, aborting microcode update.\n"); - return -EINVAL; + WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0); + + for (timeout = 0; timeout < USEC_PER_SEC; timeout++) { + if (!raw_atomic_read(cnt)) + return true; + + for (loops = 0; loops < loops_per_usec; loops++) + cpu_relax(); + + /* If invoked directly, tickle the NMI watchdog */ + if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) { + instrumentation_begin(); + touch_nmi_watchdog(); + instrumentation_end(); } } - - return 0; + /* Prevent the late comers from making progress and let them time out */ + raw_atomic_inc(cnt); + return false; } -static atomic_t late_cpus_in; -static atomic_t late_cpus_out; - -static int __wait_for_cpus(atomic_t *t, long long timeout) +static noinstr bool wait_for_ctrl(void) { - int all_cpus = num_online_cpus(); - - atomic_inc(t); + unsigned int timeout, loops; - while (atomic_read(t) < all_cpus) { - if (timeout < SPINUNIT) { - pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", - all_cpus - atomic_read(t)); - return 1; - } + for (timeout = 0; timeout < USEC_PER_SEC; timeout++) { + if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT) + return true; - ndelay(SPINUNIT); - timeout -= SPINUNIT; + for (loops = 0; loops < loops_per_usec; loops++) + cpu_relax(); - touch_nmi_watchdog(); + /* If invoked directly, tickle the NMI watchdog */ + if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) { + instrumentation_begin(); + touch_nmi_watchdog(); + instrumentation_end(); + } } - return 0; + return false; } /* - * Returns: - * < 0 - on error - * 0 - success (no update done or microcode was updated) + * Protected against instrumentation up to the point where the primary + * thread completed the update. See microcode_nmi_handler() for details. */ -static int __reload_late(void *info) +static noinstr bool load_secondary_wait(unsigned int ctrl_cpu) { - int cpu = smp_processor_id(); - enum ucode_state err; - int ret = 0; + /* Initial rendezvous to ensure that all CPUs have arrived */ + if (!wait_for_cpus(&late_cpus_in)) { + raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT); + return false; + } /* - * Wait for all CPUs to arrive. A load will not be attempted unless all - * CPUs show up. - * */ - if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) - return -1; + * Wait for primary threads to complete. If one of them hangs due + * to the update, there is no way out. This is non-recoverable + * because the CPU might hold locks or resources and confuse the + * scheduler, watchdogs etc. There is no way to safely evacuate the + * machine. + */ + if (wait_for_ctrl()) + return true; + instrumentation_begin(); + panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu); + instrumentation_end(); +} + +/* + * Protected against instrumentation up to the point where the primary + * thread completed the update. See microcode_nmi_handler() for details. + */ +static noinstr void load_secondary(unsigned int cpu) +{ + unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu); + enum ucode_state ret; + + if (!load_secondary_wait(ctrl_cpu)) { + instrumentation_begin(); + pr_err_once("load: %d CPUs timed out\n", + atomic_read(&late_cpus_in) - 1); + instrumentation_end(); + return; + } + + /* Primary thread completed. Allow to invoke instrumentable code */ + instrumentation_begin(); /* - * On an SMT system, it suffices to load the microcode on one sibling of - * the core because the microcode engine is shared between the threads. - * Synchronization still needs to take place so that no concurrent - * loading attempts happen on multiple threads of an SMT core. See - * below. + * If the primary succeeded then invoke the apply() callback, + * otherwise copy the state from the primary thread. */ - if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu) - err = microcode_ops->apply_microcode(cpu); + if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY) + ret = microcode_ops->apply_microcode(cpu); else - goto wait_for_siblings; + ret = per_cpu(ucode_ctrl.result, ctrl_cpu); - if (err >= UCODE_NFOUND) { - if (err == UCODE_ERROR) { - pr_warn("Error reloading microcode on CPU %d\n", cpu); - ret = -1; - } + this_cpu_write(ucode_ctrl.result, ret); + this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE); + instrumentation_end(); +} + +static void __load_primary(unsigned int cpu) +{ + struct cpumask *secondaries = topology_sibling_cpumask(cpu); + enum sibling_ctrl ctrl; + enum ucode_state ret; + unsigned int sibling; + + /* Initial rendezvous to ensure that all CPUs have arrived */ + if (!wait_for_cpus(&late_cpus_in)) { + this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT); + pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1); + return; } -wait_for_siblings: - if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC)) - panic("Timeout during microcode update!\n"); + ret = microcode_ops->apply_microcode(cpu); + this_cpu_write(ucode_ctrl.result, ret); + this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE); /* - * At least one thread has completed update on each core. - * For others, simply call the update to make sure the - * per-cpu cpuinfo can be updated with right microcode - * revision. + * If the update was successful, let the siblings run the apply() + * callback. If not, tell them it's done. This also covers the + * case where the CPU has uniform loading at package or system + * scope implemented but does not advertise it. */ - if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu) - err = microcode_ops->apply_microcode(cpu); + if (ret == UCODE_UPDATED || ret == UCODE_OK) + ctrl = SCTRL_APPLY; + else + ctrl = SCTRL_DONE; + + for_each_cpu(sibling, secondaries) { + if (sibling != cpu) + per_cpu(ucode_ctrl.ctrl, sibling) = ctrl; + } +} + +static bool kick_offline_cpus(unsigned int nr_offl) +{ + unsigned int cpu, timeout; + + for_each_cpu(cpu, &cpu_offline_mask) { + /* Enable the rendezvous handler and send NMI */ + per_cpu(ucode_ctrl.nmi_enabled, cpu) = true; + apic_send_nmi_to_offline_cpu(cpu); + } + + /* Wait for them to arrive */ + for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) { + if (atomic_read(&offline_in_nmi) == nr_offl) + return true; + udelay(1); + } + /* Let the others time out */ + return false; +} + +static void release_offline_cpus(void) +{ + unsigned int cpu; + + for_each_cpu(cpu, &cpu_offline_mask) + per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE; +} + +static void load_primary(unsigned int cpu) +{ + unsigned int nr_offl = cpumask_weight(&cpu_offline_mask); + bool proceed = true; + + /* Kick soft-offlined SMT siblings if required */ + if (!cpu && nr_offl) + proceed = kick_offline_cpus(nr_offl); + + /* If the soft-offlined CPUs did not respond, abort */ + if (proceed) + __load_primary(cpu); - return ret; + /* Unconditionally release soft-offlined SMT siblings if required */ + if (!cpu && nr_offl) + release_offline_cpus(); } /* - * Reload microcode late on all CPUs. Wait for a sec until they - * all gather together. + * Minimal stub rendezvous handler for soft-offlined CPUs which participate + * in the NMI rendezvous to protect against a concurrent NMI on affected + * CPUs. */ -static int microcode_reload_late(void) +void noinstr microcode_offline_nmi_handler(void) { - int old = boot_cpu_data.microcode, ret; + if (!raw_cpu_read(ucode_ctrl.nmi_enabled)) + return; + raw_cpu_write(ucode_ctrl.nmi_enabled, false); + raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE); + raw_atomic_inc(&offline_in_nmi); + wait_for_ctrl(); +} + +static noinstr bool microcode_update_handler(void) +{ + unsigned int cpu = raw_smp_processor_id(); + + if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) { + instrumentation_begin(); + load_primary(cpu); + instrumentation_end(); + } else { + load_secondary(cpu); + } + + instrumentation_begin(); + touch_nmi_watchdog(); + instrumentation_end(); + + return true; +} + +/* + * Protection against instrumentation is required for CPUs which are not + * safe against an NMI which is delivered to the secondary SMT sibling + * while the primary thread updates the microcode. Instrumentation can end + * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI + * which is the opposite of what the NMI rendezvous is trying to achieve. + * + * The primary thread is safe versus instrumentation as the actual + * microcode update handles this correctly. It's only the sibling code + * path which must be NMI safe until the primary thread completed the + * update. + */ +bool noinstr microcode_nmi_handler(void) +{ + if (!raw_cpu_read(ucode_ctrl.nmi_enabled)) + return false; + + raw_cpu_write(ucode_ctrl.nmi_enabled, false); + return microcode_update_handler(); +} + +static int load_cpus_stopped(void *unused) +{ + if (microcode_ops->use_nmi) { + /* Enable the NMI handler and raise NMI */ + this_cpu_write(ucode_ctrl.nmi_enabled, true); + apic->send_IPI(smp_processor_id(), NMI_VECTOR); + } else { + /* Just invoke the handler directly */ + microcode_update_handler(); + } + return 0; +} + +static int load_late_stop_cpus(bool is_safe) +{ + unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0; + unsigned int nr_offl, offline = 0; + int old_rev = boot_cpu_data.microcode; struct cpuinfo_x86 prev_info; - pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n"); - pr_err("You should switch to early loading, if possible.\n"); + if (!is_safe) { + pr_err("Late microcode loading without minimal revision check.\n"); + pr_err("You should switch to early loading, if possible.\n"); + } - atomic_set(&late_cpus_in, 0); - atomic_set(&late_cpus_out, 0); + atomic_set(&late_cpus_in, num_online_cpus()); + atomic_set(&offline_in_nmi, 0); + loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000); /* * Take a snapshot before the microcode update in order to compare and @@ -448,52 +556,162 @@ static int microcode_reload_late(void) */ store_cpu_caps(&prev_info); - ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); - if (!ret) { - pr_info("Reload succeeded, microcode revision: 0x%x -> 0x%x\n", - old, boot_cpu_data.microcode); - microcode_check(&prev_info); - } else { - pr_info("Reload failed, current microcode revision: 0x%x\n", - boot_cpu_data.microcode); + if (microcode_ops->use_nmi) + static_branch_enable_cpuslocked(µcode_nmi_handler_enable); + + stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask); + + if (microcode_ops->use_nmi) + static_branch_disable_cpuslocked(µcode_nmi_handler_enable); + + /* Analyze the results */ + for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) { + switch (per_cpu(ucode_ctrl.result, cpu)) { + case UCODE_UPDATED: updated++; break; + case UCODE_TIMEOUT: timedout++; break; + case UCODE_OK: siblings++; break; + case UCODE_OFFLINE: offline++; break; + default: failed++; break; + } + } + + if (microcode_ops->finalize_late_load) + microcode_ops->finalize_late_load(!updated); + + if (!updated) { + /* Nothing changed. */ + if (!failed && !timedout) + return 0; + + nr_offl = cpumask_weight(&cpu_offline_mask); + if (offline < nr_offl) { + pr_warn("%u offline siblings did not respond.\n", + nr_offl - atomic_read(&offline_in_nmi)); + return -EIO; + } + pr_err("update failed: %u CPUs failed %u CPUs timed out\n", + failed, timedout); + return -EIO; + } + + if (!is_safe || failed || timedout) + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + + pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings); + if (failed || timedout) { + pr_err("load incomplete. %u CPUs timed out or failed\n", + num_online_cpus() - (updated + siblings)); + } + pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode); + microcode_check(&prev_info); + + return updated + siblings == num_online_cpus() ? 0 : -EIO; +} + +/* + * This function does two things: + * + * 1) Ensure that all required CPUs which are present and have been booted + * once are online. + * + * To pass this check, all primary threads must be online. + * + * If the microcode load is not safe against NMI then all SMT threads + * must be online as well because they still react to NMIs when they are + * soft-offlined and parked in one of the play_dead() variants. So if a + * NMI hits while the primary thread updates the microcode the resulting + * behaviour is undefined. The default play_dead() implementation on + * modern CPUs uses MWAIT, which is also not guaranteed to be safe + * against a microcode update which affects MWAIT. + * + * As soft-offlined CPUs still react on NMIs, the SMT sibling + * restriction can be lifted when the vendor driver signals to use NMI + * for rendezvous and the APIC provides a mechanism to send an NMI to a + * soft-offlined CPU. The soft-offlined CPUs are then able to + * participate in the rendezvous in a trivial stub handler. + * + * 2) Initialize the per CPU control structure and create a cpumask + * which contains "offline"; secondary threads, so they can be handled + * correctly by a control CPU. + */ +static bool setup_cpus(void) +{ + struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, }; + bool allow_smt_offline; + unsigned int cpu; + + allow_smt_offline = microcode_ops->nmi_safe || + (microcode_ops->use_nmi && apic->nmi_to_offline_cpu); + + cpumask_clear(&cpu_offline_mask); + + for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) { + /* + * Offline CPUs sit in one of the play_dead() functions + * with interrupts disabled, but they still react on NMIs + * and execute arbitrary code. Also MWAIT being updated + * while the offline CPU sits there is not necessarily safe + * on all CPU variants. + * + * Mark them in the offline_cpus mask which will be handled + * by CPU0 later in the update process. + * + * Ensure that the primary thread is online so that it is + * guaranteed that all cores are updated. + */ + if (!cpu_online(cpu)) { + if (topology_is_primary_thread(cpu) || !allow_smt_offline) { + pr_err("CPU %u not online, loading aborted\n", cpu); + return false; + } + cpumask_set_cpu(cpu, &cpu_offline_mask); + per_cpu(ucode_ctrl, cpu) = ctrl; + continue; + } + + /* + * Initialize the per CPU state. This is core scope for now, + * but prepared to take package or system scope into account. + */ + ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu)); + per_cpu(ucode_ctrl, cpu) = ctrl; } + return true; +} - return ret; +static int load_late_locked(void) +{ + if (!setup_cpus()) + return -EBUSY; + + switch (microcode_ops->request_microcode_fw(0, µcode_pdev->dev)) { + case UCODE_NEW: + return load_late_stop_cpus(false); + case UCODE_NEW_SAFE: + return load_late_stop_cpus(true); + case UCODE_NFOUND: + return -ENOENT; + default: + return -EBADFD; + } } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { - enum ucode_state tmp_ret = UCODE_OK; - int bsp = boot_cpu_data.cpu_index; unsigned long val; - ssize_t ret = 0; + ssize_t ret; ret = kstrtoul(buf, 0, &val); if (ret || val != 1) return -EINVAL; cpus_read_lock(); - - ret = check_online_cpus(); - if (ret) - goto put; - - tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev); - if (tmp_ret != UCODE_NEW) - goto put; - - ret = microcode_reload_late(); -put: + ret = load_late_locked(); cpus_read_unlock(); - if (ret == 0) - ret = size; - - add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); - - return ret; + return ret ? : size; } static DEVICE_ATTR_WO(reload); @@ -535,17 +753,6 @@ static void microcode_fini_cpu(int cpu) microcode_ops->microcode_fini_cpu(cpu); } -static enum ucode_state microcode_init_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - memset(uci, 0, sizeof(*uci)); - - microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig); - - return microcode_ops->apply_microcode(cpu); -} - /** * microcode_bsp_resume - Update boot CPU microcode during resume. */ @@ -564,19 +771,18 @@ static struct syscore_ops mc_syscore_ops = { .resume = microcode_bsp_resume, }; -static int mc_cpu_starting(unsigned int cpu) -{ - enum ucode_state err = microcode_ops->apply_microcode(cpu); - - pr_debug("%s: CPU%d, err: %d\n", __func__, cpu, err); - - return err == UCODE_ERROR; -} - static int mc_cpu_online(unsigned int cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct device *dev = get_cpu_device(cpu); + memset(uci, 0, sizeof(*uci)); + + microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig); + cpu_data(cpu).microcode = uci->cpu_sig.rev; + if (!cpu) + boot_cpu_data.microcode = uci->cpu_sig.rev; + if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); return 0; @@ -584,33 +790,13 @@ static int mc_cpu_online(unsigned int cpu) static int mc_cpu_down_prep(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); + struct device *dev = get_cpu_device(cpu); microcode_fini_cpu(cpu); - - /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&dev->kobj, &mc_attr_group); - pr_debug("%s: CPU%d\n", __func__, cpu); - return 0; } -static void setup_online_cpu(struct work_struct *work) -{ - int cpu = smp_processor_id(); - enum ucode_state err; - - err = microcode_init_cpu(cpu); - if (err == UCODE_ERROR) { - pr_err("Error applying microcode on CPU%d\n", cpu); - return; - } - - mc_cpu_online(cpu); -} - static struct attribute *cpu_root_microcode_attrs[] = { #ifdef CONFIG_MICROCODE_LATE_LOADING &dev_attr_reload.attr, @@ -656,14 +842,9 @@ static int __init microcode_init(void) } } - /* Do per-CPU setup */ - schedule_on_each_cpu(setup_online_cpu); - register_syscore_ops(&mc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", - mc_cpu_starting, NULL); - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", - mc_cpu_online, mc_cpu_down_prep); + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", + mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); @@ -674,5 +855,4 @@ static int __init microcode_init(void) return error; } -fs_initcall(save_microcode_in_initrd); late_initcall(microcode_init); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 94dd6af9c963..6024feb98d29 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -14,7 +14,6 @@ #include <linux/earlycpio.h> #include <linux/firmware.h> #include <linux/uaccess.h> -#include <linux/vmalloc.h> #include <linux/initrd.h> #include <linux/kernel.h> #include <linux/slab.h> @@ -32,11 +31,14 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; +#define UCODE_BSP_LOADED ((struct microcode_intel *)0x1UL) + /* Current microcode patch used in early patching on the APs. */ -static struct microcode_intel *intel_ucode_patch; +static struct microcode_intel *ucode_patch_va __read_mostly; +static struct microcode_intel *ucode_patch_late __read_mostly; /* last level cache size per core */ -static int llc_size_per_core; +static unsigned int llc_size_per_core __ro_after_init; /* microcode format is extended from prescott processors */ struct extended_signature { @@ -66,60 +68,52 @@ static inline unsigned int exttable_size(struct extended_sigtable *et) return et->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE; } -int intel_cpu_collect_info(struct ucode_cpu_info *uci) +void intel_collect_cpu_info(struct cpu_signature *sig) { - unsigned int val[2]; - unsigned int family, model; - struct cpu_signature csig = { 0 }; - unsigned int eax, ebx, ecx, edx; - - memset(uci, 0, sizeof(*uci)); - - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - csig.sig = eax; + sig->sig = cpuid_eax(1); + sig->pf = 0; + sig->rev = intel_get_microcode_revision(); - family = x86_family(eax); - model = x86_model(eax); + if (x86_model(sig->sig) >= 5 || x86_family(sig->sig) > 6) { + unsigned int val[2]; - if (model >= 5 || family > 6) { /* get processor flags from MSR 0x17 */ native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - csig.pf = 1 << ((val[1] >> 18) & 7); + sig->pf = 1 << ((val[1] >> 18) & 7); } +} +EXPORT_SYMBOL_GPL(intel_collect_cpu_info); - csig.rev = intel_get_microcode_revision(); - - uci->cpu_sig = csig; +static inline bool cpu_signatures_match(struct cpu_signature *s1, unsigned int sig2, + unsigned int pf2) +{ + if (s1->sig != sig2) + return false; - return 0; + /* Processor flags are either both 0 or they intersect. */ + return ((!s1->pf && !pf2) || (s1->pf & pf2)); } -EXPORT_SYMBOL_GPL(intel_cpu_collect_info); -/* - * Returns 1 if update has been found, 0 otherwise. - */ -int intel_find_matching_signature(void *mc, unsigned int csig, int cpf) +bool intel_find_matching_signature(void *mc, struct cpu_signature *sig) { struct microcode_header_intel *mc_hdr = mc; - struct extended_sigtable *ext_hdr; struct extended_signature *ext_sig; + struct extended_sigtable *ext_hdr; int i; - if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf)) - return 1; + if (cpu_signatures_match(sig, mc_hdr->sig, mc_hdr->pf)) + return true; /* Look for ext. headers: */ if (get_totalsize(mc_hdr) <= intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE) - return 0; + return false; ext_hdr = mc + intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE; ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE; for (i = 0; i < ext_hdr->count; i++) { - if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf)) - return 1; + if (cpu_signatures_match(sig, ext_sig->sig, ext_sig->pf)) + return true; ext_sig++; } return 0; @@ -240,264 +234,91 @@ int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type) } EXPORT_SYMBOL_GPL(intel_microcode_sanity_check); -/* - * Returns 1 if update has been found, 0 otherwise. - */ -static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev) +static void update_ucode_pointer(struct microcode_intel *mc) { - struct microcode_header_intel *mc_hdr = mc; - - if (mc_hdr->rev <= new_rev) - return 0; - - return intel_find_matching_signature(mc, csig, cpf); -} - -static struct ucode_patch *memdup_patch(void *data, unsigned int size) -{ - struct ucode_patch *p; - - p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL); - if (!p) - return NULL; - - p->data = kmemdup(data, size, GFP_KERNEL); - if (!p->data) { - kfree(p); - return NULL; - } - - return p; -} - -static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) -{ - struct microcode_header_intel *mc_hdr, *mc_saved_hdr; - struct ucode_patch *iter, *tmp, *p = NULL; - bool prev_found = false; - unsigned int sig, pf; - - mc_hdr = (struct microcode_header_intel *)data; - - list_for_each_entry_safe(iter, tmp, µcode_cache, plist) { - mc_saved_hdr = (struct microcode_header_intel *)iter->data; - sig = mc_saved_hdr->sig; - pf = mc_saved_hdr->pf; - - if (intel_find_matching_signature(data, sig, pf)) { - prev_found = true; - - if (mc_hdr->rev <= mc_saved_hdr->rev) - continue; - - p = memdup_patch(data, size); - if (!p) - pr_err("Error allocating buffer %p\n", data); - else { - list_replace(&iter->plist, &p->plist); - kfree(iter->data); - kfree(iter); - } - } - } + kvfree(ucode_patch_va); /* - * There weren't any previous patches found in the list cache; save the - * newly found. + * Save the virtual address for early loading and for eventual free + * on late loading. */ - if (!prev_found) { - p = memdup_patch(data, size); - if (!p) - pr_err("Error allocating buffer for %p\n", data); - else - list_add_tail(&p->plist, µcode_cache); - } - - if (!p) - return; + ucode_patch_va = mc; +} - if (!intel_find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) - return; +static void save_microcode_patch(struct microcode_intel *patch) +{ + unsigned int size = get_totalsize(&patch->hdr); + struct microcode_intel *mc; - /* - * Save for early loading. On 32-bit, that needs to be a physical - * address as the APs are running from physical addresses, before - * paging has been enabled. - */ - if (IS_ENABLED(CONFIG_X86_32)) - intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data); + mc = kvmemdup(patch, size, GFP_KERNEL); + if (mc) + update_ucode_pointer(mc); else - intel_ucode_patch = p->data; + pr_err("Unable to allocate microcode memory size: %u\n", size); } -/* - * Get microcode matching with BSP's model. Only CPUs with the same model as - * BSP can stay in the platform. - */ -static struct microcode_intel * -scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) +/* Scan blob for microcode matching the boot CPUs family, model, stepping */ +static __init struct microcode_intel *scan_microcode(void *data, size_t size, + struct ucode_cpu_info *uci, + bool save) { struct microcode_header_intel *mc_header; struct microcode_intel *patch = NULL; + u32 cur_rev = uci->cpu_sig.rev; unsigned int mc_size; - while (size) { - if (size < sizeof(struct microcode_header_intel)) - break; - + for (; size >= sizeof(struct microcode_header_intel); size -= mc_size, data += mc_size) { mc_header = (struct microcode_header_intel *)data; mc_size = get_totalsize(mc_header); - if (!mc_size || - mc_size > size || + if (!mc_size || mc_size > size || intel_microcode_sanity_check(data, false, MC_HEADER_TYPE_MICROCODE) < 0) break; - size -= mc_size; - - if (!intel_find_matching_signature(data, uci->cpu_sig.sig, - uci->cpu_sig.pf)) { - data += mc_size; + if (!intel_find_matching_signature(data, &uci->cpu_sig)) continue; - } + /* + * For saving the early microcode, find the matching revision which + * was loaded on the BSP. + * + * On the BSP during early boot, find a newer revision than + * actually loaded in the CPU. + */ if (save) { - save_microcode_patch(uci, data, mc_size); - goto next; - } - - - if (!patch) { - if (!has_newer_microcode(data, - uci->cpu_sig.sig, - uci->cpu_sig.pf, - uci->cpu_sig.rev)) - goto next; - - } else { - struct microcode_header_intel *phdr = &patch->hdr; - - if (!has_newer_microcode(data, - phdr->sig, - phdr->pf, - phdr->rev)) - goto next; + if (cur_rev != mc_header->rev) + continue; + } else if (cur_rev >= mc_header->rev) { + continue; } - /* We have a newer patch, save it. */ patch = data; - -next: - data += mc_size; - } - - if (size) - return NULL; - - return patch; -} - -static bool load_builtin_intel_microcode(struct cpio_data *cp) -{ - unsigned int eax = 1, ebx, ecx = 0, edx; - struct firmware fw; - char name[30]; - - if (IS_ENABLED(CONFIG_X86_32)) - return false; - - native_cpuid(&eax, &ebx, &ecx, &edx); - - sprintf(name, "intel-ucode/%02x-%02x-%02x", - x86_family(eax), x86_model(eax), x86_stepping(eax)); - - if (firmware_request_builtin(&fw, name)) { - cp->size = fw.size; - cp->data = (void *)fw.data; - return true; - } - - return false; -} - -static void print_ucode_info(int old_rev, int new_rev, unsigned int date) -{ - pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n", - old_rev, - new_rev, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); -} - -#ifdef CONFIG_X86_32 - -static int delay_ucode_info; -static int current_mc_date; -static int early_old_rev; - -/* - * Print early updated ucode info after printk works. This is delayed info dump. - */ -void show_ucode_info_early(void) -{ - struct ucode_cpu_info uci; - - if (delay_ucode_info) { - intel_cpu_collect_info(&uci); - print_ucode_info(early_old_rev, uci.cpu_sig.rev, current_mc_date); - delay_ucode_info = 0; + cur_rev = mc_header->rev; } -} - -/* - * At this point, we can not call printk() yet. Delay printing microcode info in - * show_ucode_info_early() until printk() works. - */ -static void print_ucode(int old_rev, int new_rev, int date) -{ - int *delay_ucode_info_p; - int *current_mc_date_p; - int *early_old_rev_p; - - delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); - current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); - early_old_rev_p = (int *)__pa_nodebug(&early_old_rev); - - *delay_ucode_info_p = 1; - *current_mc_date_p = date; - *early_old_rev_p = old_rev; -} -#else -static inline void print_ucode(int old_rev, int new_rev, int date) -{ - print_ucode_info(old_rev, new_rev, date); + return size ? NULL : patch; } -#endif -static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) +static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, + struct microcode_intel *mc, + u32 *cur_rev) { - struct microcode_intel *mc; - u32 rev, old_rev; + u32 rev; - mc = uci->mc; if (!mc) - return 0; + return UCODE_NFOUND; /* * Save us the MSR write below - which is a particular expensive * operation - when the other hyperthread has updated the microcode * already. */ - rev = intel_get_microcode_revision(); - if (rev >= mc->hdr.rev) { - uci->cpu_sig.rev = rev; + *cur_rev = intel_get_microcode_revision(); + if (*cur_rev >= mc->hdr.rev) { + uci->cpu_sig.rev = *cur_rev; return UCODE_OK; } - old_rev = rev; - /* * Writeback and invalidate caches before updating microcode to avoid * internal issues depending on what the microcode is updating. @@ -509,247 +330,182 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) rev = intel_get_microcode_revision(); if (rev != mc->hdr.rev) - return -1; + return UCODE_ERROR; uci->cpu_sig.rev = rev; + return UCODE_UPDATED; +} - if (early) - print_ucode(old_rev, uci->cpu_sig.rev, mc->hdr.date); - else - print_ucode_info(old_rev, uci->cpu_sig.rev, mc->hdr.date); +static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc = uci->mc; + enum ucode_state ret; + u32 cur_rev, date; - return 0; + ret = __apply_microcode(uci, mc, &cur_rev); + if (ret == UCODE_UPDATED) { + date = mc->hdr.date; + pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n", + cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff); + } + return ret; } -int __init save_microcode_in_initrd_intel(void) +static __init bool load_builtin_intel_microcode(struct cpio_data *cp) { - struct ucode_cpu_info uci; - struct cpio_data cp; - - /* - * initrd is going away, clear patch ptr. We will scan the microcode one - * last time before jettisoning and save a patch, if found. Then we will - * update that pointer too, with a stable patch address to use when - * resuming the cores. - */ - intel_ucode_patch = NULL; + unsigned int eax = 1, ebx, ecx = 0, edx; + struct firmware fw; + char name[30]; - if (!load_builtin_intel_microcode(&cp)) - cp = find_microcode_in_initrd(ucode_path, false); + if (IS_ENABLED(CONFIG_X86_32)) + return false; - if (!(cp.data && cp.size)) - return 0; + native_cpuid(&eax, &ebx, &ecx, &edx); - intel_cpu_collect_info(&uci); + sprintf(name, "intel-ucode/%02x-%02x-%02x", + x86_family(eax), x86_model(eax), x86_stepping(eax)); - scan_microcode(cp.data, cp.size, &uci, true); - return 0; + if (firmware_request_builtin(&fw, name)) { + cp->size = fw.size; + cp->data = (void *)fw.data; + return true; + } + return false; } -/* - * @res_patch, output: a pointer to the patch we found. - */ -static struct microcode_intel *__load_ucode_intel(struct ucode_cpu_info *uci) +static __init struct microcode_intel *get_microcode_blob(struct ucode_cpu_info *uci, bool save) { - static const char *path; struct cpio_data cp; - bool use_pa; - - if (IS_ENABLED(CONFIG_X86_32)) { - path = (const char *)__pa_nodebug(ucode_path); - use_pa = true; - } else { - path = ucode_path; - use_pa = false; - } - /* try built-in microcode first */ if (!load_builtin_intel_microcode(&cp)) - cp = find_microcode_in_initrd(path, use_pa); + cp = find_microcode_in_initrd(ucode_path); if (!(cp.data && cp.size)) return NULL; - intel_cpu_collect_info(uci); + intel_collect_cpu_info(&uci->cpu_sig); - return scan_microcode(cp.data, cp.size, uci, false); + return scan_microcode(cp.data, cp.size, uci, save); } -void __init load_ucode_intel_bsp(void) +/* + * Invoked from an early init call to save the microcode blob which was + * selected during early boot when mm was not usable. The microcode must be + * saved because initrd is going away. It's an early init call so the APs + * just can use the pointer and do not have to scan initrd/builtin firmware + * again. + */ +static int __init save_builtin_microcode(void) { - struct microcode_intel *patch; struct ucode_cpu_info uci; - patch = __load_ucode_intel(&uci); - if (!patch) - return; + if (xchg(&ucode_patch_va, NULL) != UCODE_BSP_LOADED) + return 0; - uci.mc = patch; + if (dis_ucode_ldr || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; - apply_microcode_early(&uci, true); + uci.mc = get_microcode_blob(&uci, true); + if (uci.mc) + save_microcode_patch(uci.mc); + return 0; } +early_initcall(save_builtin_microcode); -void load_ucode_intel_ap(void) +/* Load microcode on BSP from initrd or builtin blobs */ +void __init load_ucode_intel_bsp(void) { - struct microcode_intel *patch, **iup; struct ucode_cpu_info uci; - if (IS_ENABLED(CONFIG_X86_32)) - iup = (struct microcode_intel **) __pa_nodebug(&intel_ucode_patch); - else - iup = &intel_ucode_patch; - - if (!*iup) { - patch = __load_ucode_intel(&uci); - if (!patch) - return; - - *iup = patch; - } - - uci.mc = *iup; - - apply_microcode_early(&uci, true); + uci.mc = get_microcode_blob(&uci, false); + if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) + ucode_patch_va = UCODE_BSP_LOADED; } -static struct microcode_intel *find_patch(struct ucode_cpu_info *uci) +void load_ucode_intel_ap(void) { - struct microcode_header_intel *phdr; - struct ucode_patch *iter, *tmp; - - list_for_each_entry_safe(iter, tmp, µcode_cache, plist) { - - phdr = (struct microcode_header_intel *)iter->data; - - if (phdr->rev <= uci->cpu_sig.rev) - continue; - - if (!intel_find_matching_signature(phdr, - uci->cpu_sig.sig, - uci->cpu_sig.pf)) - continue; + struct ucode_cpu_info uci; - return iter->data; - } - return NULL; + uci.mc = ucode_patch_va; + if (uci.mc) + apply_microcode_early(&uci); } +/* Reload microcode on resume */ void reload_ucode_intel(void) { - struct microcode_intel *p; - struct ucode_cpu_info uci; - - intel_cpu_collect_info(&uci); - - p = find_patch(&uci); - if (!p) - return; - - uci.mc = p; + struct ucode_cpu_info uci = { .mc = ucode_patch_va, }; - apply_microcode_early(&uci, false); + if (uci.mc) + apply_microcode_early(&uci); } static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - unsigned int val[2]; - - memset(csig, 0, sizeof(*csig)); - - csig->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - csig->pf = 1 << ((val[1] >> 18) & 7); - } - - csig->rev = c->microcode; - + intel_collect_cpu_info(csig); return 0; } -static enum ucode_state apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_late(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct cpuinfo_x86 *c = &cpu_data(cpu); - bool bsp = c->cpu_index == boot_cpu_data.cpu_index; - struct microcode_intel *mc; + struct microcode_intel *mc = ucode_patch_late; enum ucode_state ret; - static int prev_rev; - u32 rev; + u32 cur_rev; - /* We should bind the task to the CPU */ - if (WARN_ON(raw_smp_processor_id() != cpu)) + if (WARN_ON_ONCE(smp_processor_id() != cpu)) return UCODE_ERROR; - /* Look for a newer patch in our cache: */ - mc = find_patch(uci); - if (!mc) { - mc = uci->mc; - if (!mc) - return UCODE_NFOUND; - } + ret = __apply_microcode(uci, mc, &cur_rev); + if (ret != UCODE_UPDATED && ret != UCODE_OK) + return ret; - /* - * Save us the MSR write below - which is a particular expensive - * operation - when the other hyperthread has updated the microcode - * already. - */ - rev = intel_get_microcode_revision(); - if (rev >= mc->hdr.rev) { - ret = UCODE_OK; - goto out; + if (!cpu && uci->cpu_sig.rev != cur_rev) { + pr_info("Updated to revision 0x%x, date = %04x-%02x-%02x\n", + uci->cpu_sig.rev, mc->hdr.date & 0xffff, mc->hdr.date >> 24, + (mc->hdr.date >> 16) & 0xff); } - /* - * Writeback and invalidate caches before updating microcode to avoid - * internal issues depending on what the microcode is updating. - */ - native_wbinvd(); + cpu_data(cpu).microcode = uci->cpu_sig.rev; + if (!cpu) + boot_cpu_data.microcode = uci->cpu_sig.rev; - /* write microcode via MSR 0x79 */ - wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); + return ret; +} - rev = intel_get_microcode_revision(); +static bool ucode_validate_minrev(struct microcode_header_intel *mc_header) +{ + int cur_rev = boot_cpu_data.microcode; - if (rev != mc->hdr.rev) { - pr_err("CPU%d update to revision 0x%x failed\n", - cpu, mc->hdr.rev); - return UCODE_ERROR; + /* + * When late-loading, ensure the header declares a minimum revision + * required to perform a late-load. The previously reserved field + * is 0 in older microcode blobs. + */ + if (!mc_header->min_req_ver) { + pr_info("Unsafe microcode update: Microcode header does not specify a required min version\n"); + return false; } - if (bsp && rev != prev_rev) { - pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n", - rev, - mc->hdr.date & 0xffff, - mc->hdr.date >> 24, - (mc->hdr.date >> 16) & 0xff); - prev_rev = rev; + /* + * Check whether the current revision is either greater or equal to + * to the minimum revision specified in the header. + */ + if (cur_rev < mc_header->min_req_ver) { + pr_info("Unsafe microcode update: Current revision 0x%x too old\n", cur_rev); + pr_info("Current should be at 0x%x or higher. Use early loading instead\n", mc_header->min_req_ver); + return false; } - - ret = UCODE_UPDATED; - -out: - uci->cpu_sig.rev = rev; - c->microcode = rev; - - /* Update boot_cpu_data's revision too, if we're on the BSP: */ - if (bsp) - boot_cpu_data.microcode = rev; - - return ret; + return true; } -static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter) +static enum ucode_state parse_microcode_blobs(int cpu, struct iov_iter *iter) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - unsigned int curr_mc_size = 0, new_mc_size = 0; - enum ucode_state ret = UCODE_OK; - int new_rev = uci->cpu_sig.rev; + bool is_safe, new_is_safe = false; + int cur_rev = uci->cpu_sig.rev; + unsigned int curr_mc_size = 0; u8 *new_mc = NULL, *mc = NULL; - unsigned int csig, cpf; while (iov_iter_count(iter)) { struct microcode_header_intel mc_header; @@ -758,68 +514,66 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter) if (!copy_from_iter_full(&mc_header, sizeof(mc_header), iter)) { pr_err("error! Truncated or inaccessible header in microcode data file\n"); - break; + goto fail; } mc_size = get_totalsize(&mc_header); if (mc_size < sizeof(mc_header)) { pr_err("error! Bad data in microcode data file (totalsize too small)\n"); - break; + goto fail; } data_size = mc_size - sizeof(mc_header); if (data_size > iov_iter_count(iter)) { pr_err("error! Bad data in microcode data file (truncated file?)\n"); - break; + goto fail; } /* For performance reasons, reuse mc area when possible */ if (!mc || mc_size > curr_mc_size) { - vfree(mc); - mc = vmalloc(mc_size); + kvfree(mc); + mc = kvmalloc(mc_size, GFP_KERNEL); if (!mc) - break; + goto fail; curr_mc_size = mc_size; } memcpy(mc, &mc_header, sizeof(mc_header)); data = mc + sizeof(mc_header); if (!copy_from_iter_full(data, data_size, iter) || - intel_microcode_sanity_check(mc, true, MC_HEADER_TYPE_MICROCODE) < 0) { - break; - } + intel_microcode_sanity_check(mc, true, MC_HEADER_TYPE_MICROCODE) < 0) + goto fail; - csig = uci->cpu_sig.sig; - cpf = uci->cpu_sig.pf; - if (has_newer_microcode(mc, csig, cpf, new_rev)) { - vfree(new_mc); - new_rev = mc_header.rev; - new_mc = mc; - new_mc_size = mc_size; - mc = NULL; /* trigger new vmalloc */ - ret = UCODE_NEW; - } - } + if (cur_rev >= mc_header.rev) + continue; - vfree(mc); + if (!intel_find_matching_signature(mc, &uci->cpu_sig)) + continue; - if (iov_iter_count(iter)) { - vfree(new_mc); - return UCODE_ERROR; + is_safe = ucode_validate_minrev(&mc_header); + if (force_minrev && !is_safe) + continue; + + kvfree(new_mc); + cur_rev = mc_header.rev; + new_mc = mc; + new_is_safe = is_safe; + mc = NULL; } + if (iov_iter_count(iter)) + goto fail; + + kvfree(mc); if (!new_mc) return UCODE_NFOUND; - vfree(uci->mc); - uci->mc = (struct microcode_intel *)new_mc; - - /* Save for CPU hotplug */ - save_microcode_patch(uci, new_mc, new_mc_size); + ucode_patch_late = (struct microcode_intel *)new_mc; + return new_is_safe ? UCODE_NEW_SAFE : UCODE_NEW; - pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); - - return ret; +fail: + kvfree(mc); + kvfree(new_mc); + return UCODE_ERROR; } static bool is_blacklisted(unsigned int cpu) @@ -868,26 +622,36 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) kvec.iov_base = (void *)firmware->data; kvec.iov_len = firmware->size; iov_iter_kvec(&iter, ITER_SOURCE, &kvec, 1, firmware->size); - ret = generic_load_microcode(cpu, &iter); + ret = parse_microcode_blobs(cpu, &iter); release_firmware(firmware); return ret; } +static void finalize_late_load(int result) +{ + if (!result) + update_ucode_pointer(ucode_patch_late); + else + kvfree(ucode_patch_late); + ucode_patch_late = NULL; +} + static struct microcode_ops microcode_intel_ops = { - .request_microcode_fw = request_microcode_fw, - .collect_cpu_info = collect_cpu_info, - .apply_microcode = apply_microcode_intel, + .request_microcode_fw = request_microcode_fw, + .collect_cpu_info = collect_cpu_info, + .apply_microcode = apply_microcode_late, + .finalize_late_load = finalize_late_load, + .use_nmi = IS_ENABLED(CONFIG_X86_64), }; -static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) +static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) { u64 llc_size = c->x86_cache_size * 1024ULL; do_div(llc_size, c->x86_max_cores); - - return (int)llc_size; + llc_size_per_core = (unsigned int)llc_size; } struct microcode_ops * __init init_intel_microcode(void) @@ -900,7 +664,7 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } - llc_size_per_core = calc_llc_size_per_core(c); + calc_llc_size_per_core(c); return µcode_intel_ops; } diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index bf883aa71233..f8047b12329a 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -8,43 +8,37 @@ #include <asm/cpu.h> #include <asm/microcode.h> -struct ucode_patch { - struct list_head plist; - void *data; /* Intel uses only this one */ - unsigned int size; - u32 patch_id; - u16 equiv_cpu; -}; - -extern struct list_head microcode_cache; - struct device; enum ucode_state { UCODE_OK = 0, UCODE_NEW, + UCODE_NEW_SAFE, UCODE_UPDATED, UCODE_NFOUND, UCODE_ERROR, + UCODE_TIMEOUT, + UCODE_OFFLINE, }; struct microcode_ops { enum ucode_state (*request_microcode_fw)(int cpu, struct device *dev); - void (*microcode_fini_cpu)(int cpu); /* - * The generic 'microcode_core' part guarantees that - * the callbacks below run on a target cpu when they - * are being called. + * The generic 'microcode_core' part guarantees that the callbacks + * below run on a target CPU when they are being called. * See also the "Synchronization" section in microcode_core.c. */ - enum ucode_state (*apply_microcode)(int cpu); - int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); + enum ucode_state (*apply_microcode)(int cpu); + int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); + void (*finalize_late_load)(int result); + unsigned int nmi_safe : 1, + use_nmi : 1; }; extern struct ucode_cpu_info ucode_cpu_info[]; -struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa); +struct cpio_data find_microcode_in_initrd(const char *path); #define MAX_UCODE_COUNT 128 @@ -94,12 +88,12 @@ static inline unsigned int x86_cpuid_family(void) return x86_family(eax); } -extern bool initrd_gone; +extern bool dis_ucode_ldr; +extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD void load_ucode_amd_bsp(unsigned int family); void load_ucode_amd_ap(unsigned int family); -void load_ucode_amd_early(unsigned int cpuid_1_eax); int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); @@ -107,7 +101,6 @@ void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ static inline void load_ucode_amd_bsp(unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } -static inline void load_ucode_amd_early(unsigned int family) { } static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } static inline struct microcode_ops *init_amd_microcode(void) { return NULL; } @@ -117,13 +110,11 @@ static inline void exit_amd_microcode(void) { } #ifdef CONFIG_CPU_SUP_INTEL void load_ucode_intel_bsp(void); void load_ucode_intel_ap(void); -int save_microcode_in_initrd_intel(void); void reload_ucode_intel(void); struct microcode_ops *init_intel_microcode(void); #else /* CONFIG_CPU_SUP_INTEL */ static inline void load_ucode_intel_bsp(void) { } static inline void load_ucode_intel_ap(void) { } -static inline int save_microcode_in_initrd_intel(void) { return -EINVAL; } static inline void reload_ucode_intel(void) { } static inline struct microcode_ops *init_intel_microcode(void) { return NULL; } #endif /* !CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 246a609f889b..de001b2146ab 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -19,6 +19,7 @@ #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/bios_ebda.h> +#include <asm/microcode.h> #include <asm/tlbflush.h> #include <asm/bootparam_utils.h> @@ -29,11 +30,33 @@ static void __init i386_default_early_setup(void) x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; } +#ifdef CONFIG_MICROCODE_INITRD32 +unsigned long __initdata initrd_start_early; +static pte_t __initdata *initrd_pl2p_start, *initrd_pl2p_end; + +static void zap_early_initrd_mapping(void) +{ + pte_t *pl2p = initrd_pl2p_start; + + for (; pl2p < initrd_pl2p_end; pl2p++) { + *pl2p = (pte_t){ .pte = 0 }; + + if (!IS_ENABLED(CONFIG_X86_PAE)) + *(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = (pte_t) {.pte = 0}; + } +} +#else +static inline void zap_early_initrd_mapping(void) { } +#endif + asmlinkage __visible void __init __noreturn i386_start_kernel(void) { /* Make sure IDT is set up before any exception happens */ idt_setup_early_handler(); + load_ucode_bsp(); + zap_early_initrd_mapping(); + cr4_init_shadow(); sanitize_boot_params(&boot_params); @@ -69,52 +92,83 @@ asmlinkage __visible void __init __noreturn i386_start_kernel(void) * to the first kernel PMD. Note the upper half of each PMD or PTE are * always zero at this stage. */ -void __init mk_early_pgtbl_32(void); -void __init mk_early_pgtbl_32(void) -{ -#ifdef __pa -#undef __pa -#endif -#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) - pte_t pte, *ptep; - int i; - unsigned long *ptr; - /* Enough space to fit pagetables for the low memory linear map */ - const unsigned long limit = __pa(_end) + - (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); #ifdef CONFIG_X86_PAE - pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd); -#define SET_PL2(pl2, val) { (pl2).pmd = (val); } +typedef pmd_t pl2_t; +#define pl2_base initial_pg_pmd +#define SET_PL2(val) { .pmd = (val), } #else - pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table); -#define SET_PL2(pl2, val) { (pl2).pgd = (val); } +typedef pgd_t pl2_t; +#define pl2_base initial_page_table +#define SET_PL2(val) { .pgd = (val), } #endif - ptep = (pte_t *)__pa(__brk_base); - pte.pte = PTE_IDENT_ATTR; - +static __init __no_stack_protector pte_t init_map(pte_t pte, pte_t **ptep, pl2_t **pl2p, + const unsigned long limit) +{ while ((pte.pte & PTE_PFN_MASK) < limit) { + pl2_t pl2 = SET_PL2((unsigned long)*ptep | PDE_IDENT_ATTR); + int i; + + **pl2p = pl2; + if (!IS_ENABLED(CONFIG_X86_PAE)) { + /* Kernel PDE entry */ + *(*pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2; + } - SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR); - *pl2p = pl2; -#ifndef CONFIG_X86_PAE - /* Kernel PDE entry */ - *(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2; -#endif for (i = 0; i < PTRS_PER_PTE; i++) { - *ptep = pte; + **ptep = pte; pte.pte += PAGE_SIZE; - ptep++; + (*ptep)++; } - - pl2p++; + (*pl2p)++; } + return pte; +} + +void __init __no_stack_protector mk_early_pgtbl_32(void) +{ + /* Enough space to fit pagetables for the low memory linear map */ + unsigned long limit = __pa_nodebug(_end) + (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); + pte_t pte, *ptep = (pte_t *)__pa_nodebug(__brk_base); + struct boot_params __maybe_unused *params; + pl2_t *pl2p = (pl2_t *)__pa_nodebug(pl2_base); + unsigned long *ptr; + + pte.pte = PTE_IDENT_ATTR; + pte = init_map(pte, &ptep, &pl2p, limit); - ptr = (unsigned long *)__pa(&max_pfn_mapped); + ptr = (unsigned long *)__pa_nodebug(&max_pfn_mapped); /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */ *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; - ptr = (unsigned long *)__pa(&_brk_end); + ptr = (unsigned long *)__pa_nodebug(&_brk_end); *ptr = (unsigned long)ptep + PAGE_OFFSET; -} +#ifdef CONFIG_MICROCODE_INITRD32 + /* Running on a hypervisor? */ + if (native_cpuid_ecx(1) & BIT(31)) + return; + + params = (struct boot_params *)__pa_nodebug(&boot_params); + if (!params->hdr.ramdisk_size || !params->hdr.ramdisk_image) + return; + + /* Save the virtual start address */ + ptr = (unsigned long *)__pa_nodebug(&initrd_start_early); + *ptr = (pte.pte & PTE_PFN_MASK) + PAGE_OFFSET; + *ptr += ((unsigned long)params->hdr.ramdisk_image) & ~PAGE_MASK; + + /* Save PLP2 for cleanup */ + ptr = (unsigned long *)__pa_nodebug(&initrd_pl2p_start); + *ptr = (unsigned long)pl2p + PAGE_OFFSET; + + limit = (unsigned long)params->hdr.ramdisk_image; + pte.pte = PTE_IDENT_ATTR | PFN_ALIGN(limit); + limit = (unsigned long)params->hdr.ramdisk_image + params->hdr.ramdisk_size; + + init_map(pte, &ptep, &pl2p, limit); + + ptr = (unsigned long *)__pa_nodebug(&initrd_pl2p_end); + *ptr = (unsigned long)pl2p + PAGE_OFFSET; +#endif +} diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b6554212b7c7..487ac57e2c81 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -118,11 +118,6 @@ SYM_CODE_START(startup_32) movl %eax, pa(olpc_ofw_pgd) #endif -#ifdef CONFIG_MICROCODE - /* Early load ucode on BSP. */ - call load_ucode_bsp -#endif - /* Create early pagetables. */ call mk_early_pgtbl_32 @@ -157,11 +152,6 @@ SYM_FUNC_START(startup_32_smp) movl %eax,%ss leal -__PAGE_OFFSET(%ecx),%esp -#ifdef CONFIG_MICROCODE - /* Early load ucode on AP. */ - call load_ucode_ap -#endif - .Ldefault_entry: movl $(CR0_STATE & ~X86_CR0_PG),%eax movl %eax,%cr0 diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4766b6bed443..17e955ab69fe 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -33,6 +33,7 @@ #include <asm/reboot.h> #include <asm/cache.h> #include <asm/nospec-branch.h> +#include <asm/microcode.h> #include <asm/sev.h> #define CREATE_TRACE_POINTS @@ -343,6 +344,9 @@ static noinstr void default_do_nmi(struct pt_regs *regs) instrumentation_begin(); + if (microcode_nmi_handler_enabled() && microcode_nmi_handler()) + goto out; + handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); if (handled) { @@ -498,8 +502,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi) if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) raw_atomic_long_inc(&nsp->idt_calls); - if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) + if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) { + if (microcode_nmi_handler_enabled()) + microcode_offline_nmi_handler(); return; + } if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { this_cpu_write(nmi_state, NMI_LATCHED); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 163c35db3d04..1526747bedf2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1167,7 +1167,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) - conswitchp = &vga_con; + vgacon_register_screen(&screen_info); #endif #endif x86_init.oem.banner(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c4aca66f0902..2cc2aa120b4b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -272,12 +272,9 @@ static void notrace start_secondary(void *unused) cpu_init_exception_handling(); /* - * 32-bit systems load the microcode from the ASM startup code for - * historical reasons. - * - * On 64-bit systems load it before reaching the AP alive - * synchronization point below so it is not part of the full per - * CPU serialized bringup part when "parallel" bringup is enabled. + * Load the microcode before reaching the AP alive synchronization + * point below so it is not part of the full per CPU serialized + * bringup part when "parallel" bringup is enabled. * * That's even safe when hyperthreading is enabled in the CPU as * the core code starts the primary threads first and leaves the @@ -290,8 +287,7 @@ static void notrace start_secondary(void *unused) * CPUID, MSRs etc. must be strictly serialized to maintain * software state correctness. */ - if (IS_ENABLED(CONFIG_X86_64)) - load_ucode_ap(); + load_ucode_ap(); /* * Synchronization point with the hotplug core. Sets this CPUs diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 6825e146a62f..b86d634730b2 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -67,15 +67,3 @@ quiet_cmd_vdso = VDSO $@ VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack GCOV_PROFILE := n - -# -# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). -# -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ -$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -PHONY += vdso_install $(vdso-install-y) -vdso_install: $(vdso-install-y) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 52d6e4870a04..bdec4a773af0 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -19,7 +19,6 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/proc_fs.h> -#include <linux/screen_info.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/reboot.h> @@ -49,17 +48,6 @@ #include <asm/timex.h> #include <asm/traps.h> -#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) -struct screen_info screen_info = { - .orig_x = 0, - .orig_y = 24, - .orig_video_cols = 80, - .orig_video_lines = 24, - .orig_video_isVGA = 1, - .orig_video_points = 16, -}; -#endif - #ifdef CONFIG_BLK_DEV_INITRD extern unsigned long initrd_start; extern unsigned long initrd_end; |