diff options
239 files changed, 3195 insertions, 2178 deletions
diff --git a/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml b/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml index 3248595dc93c..f04870d84542 100644 --- a/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml +++ b/Documentation/devicetree/bindings/dsp/fsl,dsp.yaml @@ -85,4 +85,5 @@ examples: <&pd IMX_SC_R_DSP_RAM>; mbox-names = "txdb0", "txdb1", "rxdb0", "rxdb1"; mboxes = <&lsio_mu13 2 0>, <&lsio_mu13 2 1>, <&lsio_mu13 3 0>, <&lsio_mu13 3 1>; + memory-region = <&dsp_reserved>; }; diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml index 676ec42e1438..567a33a83dce 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml @@ -43,13 +43,9 @@ properties: dvdd-supply: description: DVdd voltage supply - items: - - const: dvdd avdd-supply: description: AVdd voltage supply - items: - - const: avdd adi,rejection-60-Hz-enable: description: | @@ -99,6 +95,9 @@ required: examples: - | spi0 { + #address-cells = <1>; + #size-cells = <0>; + adc@0 { compatible = "adi,ad7192"; reg = <0>; diff --git a/Documentation/devicetree/bindings/media/rc.yaml b/Documentation/devicetree/bindings/media/rc.yaml index 3d5c154fd230..9054555e6608 100644 --- a/Documentation/devicetree/bindings/media/rc.yaml +++ b/Documentation/devicetree/bindings/media/rc.yaml @@ -73,7 +73,6 @@ properties: - rc-genius-tvgo-a11mce - rc-gotview7135 - rc-hauppauge - - rc-hauppauge - rc-hisi-poplar - rc-hisi-tv-demo - rc-imon-mce diff --git a/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml index 8a56a8526cef..a97482179cf5 100644 --- a/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml @@ -37,7 +37,7 @@ properties: - description: exclusive PHY reset line - description: shared reset line between the PCIe PHY and PCIe controller - resets-names: + reset-names: items: - const: phy - const: pcie diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 402636356fbe..a3c3349046c4 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -143,6 +143,20 @@ via their employer, they cannot enter individual non-disclosure agreements in their role as Linux kernel developers. They will, however, agree to adhere to this documented process and the Memorandum of Understanding. +The disclosing party should provide a list of contacts for all other +entities who have already been, or should be, informed about the issue. +This serves several purposes: + + - The list of disclosed entities allows communication accross the + industry, e.g. other OS vendors, HW vendors, etc. + + - The disclosed entities can be contacted to name experts who should + participate in the mitigation development. + + - If an expert which is required to handle an issue is employed by an + listed entity or member of an listed entity, then the response teams can + request the disclosure of that expert from that entity. This ensures + that the expert is also part of the entity's response team. Disclosure """""""""" @@ -158,10 +172,7 @@ Mitigation development """""""""""""""""""""" The initial response team sets up an encrypted mailing-list or repurposes -an existing one if appropriate. The disclosing party should provide a list -of contacts for all other parties who have already been, or should be, -informed about the issue. The response team contacts these parties so they -can name experts who should be subscribed to the mailing-list. +an existing one if appropriate. Using a mailing-list is close to the normal Linux development process and has been successfully used in developing mitigations for various hardware @@ -175,9 +186,24 @@ development branch against the mainline kernel and backport branches for stable kernel versions as necessary. The initial response team will identify further experts from the Linux -kernel developer community as needed and inform the disclosing party about -their participation. Bringing in experts can happen at any time of the -development process and often needs to be handled in a timely manner. +kernel developer community as needed. Bringing in experts can happen at any +time of the development process and needs to be handled in a timely manner. + +If an expert is employed by or member of an entity on the disclosure list +provided by the disclosing party, then participation will be requested from +the relevant entity. + +If not, then the disclosing party will be informed about the experts +participation. The experts are covered by the Memorandum of Understanding +and the disclosing party is requested to acknowledge the participation. In +case that the disclosing party has a compelling reason to object, then this +objection has to be raised within five work days and resolved with the +incident team immediately. If the disclosing party does not react within +five work days this is taken as silent acknowledgement. + +After acknowledgement or resolution of an objection the expert is disclosed +by the incident team and brought into the development process. + Coordinated release """"""""""""""""""" @@ -216,7 +242,7 @@ an involved disclosed party. The current ambassadors list: ARM AMD IBM - Intel + Intel Tony Luck <tony.luck@intel.com> Qualcomm Trilok Soni <tsoni@codeaurora.org> Microsoft Sasha Levin <sashal@kernel.org> diff --git a/MAINTAINERS b/MAINTAINERS index 296de2b51c83..55199ef7fa74 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6112,7 +6112,10 @@ M: Gao Xiang <gaoxiang25@huawei.com> M: Chao Yu <yuchao0@huawei.com> L: linux-erofs@lists.ozlabs.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git +F: Documentation/filesystems/erofs.txt F: fs/erofs/ +F: include/trace/events/erofs.h ERRSEQ ERROR TRACKING INFRASTRUCTURE M: Jeff Layton <jlayton@kernel.org> @@ -9075,6 +9078,7 @@ F: security/keys/ KGDB / KDB /debug_core M: Jason Wessel <jason.wessel@windriver.com> M: Daniel Thompson <daniel.thompson@linaro.org> +R: Douglas Anderson <dianders@chromium.org> W: http://kgdb.wiki.kernel.org/ L: kgdb-bugreport@lists.sourceforge.net T: git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 3 +PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Bobtail Squid # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index ebfe28c2f544..a1fd3e63e86e 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -124,10 +124,11 @@ }; lcd0: display@0 { - compatible = "panel-dpi"; + /* This isn't the exact LCD, but the timings meet spec */ + /* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */ + compatible = "newhaven,nhd-4.3-480272ef-atxl"; label = "15"; - status = "okay"; - pinctrl-names = "default"; + backlight = <&bl>; enable-gpios = <&gpio6 16 GPIO_ACTIVE_HIGH>; /* gpio176, lcd INI */ vcc-supply = <&vdd_io_reg>; @@ -136,22 +137,6 @@ remote-endpoint = <&dpi_out>; }; }; - - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <3>; - hback-porch = <2>; - hsync-len = <42>; - vback-porch = <3>; - vfront-porch = <4>; - vsync-len = <11>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; }; bl: backlight { diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 5563ee54c960..b56524cc7fe2 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -228,6 +228,20 @@ >; }; + i2c2_pins: pinmux_i2c2_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT | MUX_MODE0) /* i2c2_scl */ + OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT | MUX_MODE0) /* i2c2_sda */ + >; + }; + + i2c3_pins: pinmux_i2c3_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT | MUX_MODE0) /* i2c3_scl */ + OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT | MUX_MODE0) /* i2c3_sda */ + >; + }; + tsc2004_pins: pinmux_tsc2004_pins { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE4) /* mcbsp4_dr.gpio_153 */ @@ -249,18 +263,6 @@ OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */ >; }; - i2c2_pins: pinmux_i2c2_pins { - pinctrl-single,pins = < - OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT | MUX_MODE0) /* i2c2_scl */ - OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT | MUX_MODE0) /* i2c2_sda */ - >; - }; - i2c3_pins: pinmux_i2c3_pins { - pinctrl-single,pins = < - OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT | MUX_MODE0) /* i2c3_scl */ - OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT | MUX_MODE0) /* i2c3_sda */ - >; - }; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi index 642e809e757a..449cc7616da6 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi @@ -108,7 +108,6 @@ &dss { status = "ok"; vdds_dsi-supply = <&vpll2>; - vdda_video-supply = <&video_reg>; pinctrl-names = "default"; pinctrl-0 = <&dss_dpi_pins1>; port { @@ -124,44 +123,20 @@ display0 = &lcd0; }; - video_reg: video_reg { - pinctrl-names = "default"; - pinctrl-0 = <&panel_pwr_pins>; - compatible = "regulator-fixed"; - regulator-name = "fixed-supply"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio5 27 GPIO_ACTIVE_HIGH>; /* gpio155, lcd INI */ - }; - lcd0: display { - compatible = "panel-dpi"; + /* This isn't the exact LCD, but the timings meet spec */ + /* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */ + compatible = "newhaven,nhd-4.3-480272ef-atxl"; label = "15"; - status = "okay"; - /* default-on; */ pinctrl-names = "default"; - + pinctrl-0 = <&panel_pwr_pins>; + backlight = <&bl>; + enable-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>; port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; }; }; - - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <3>; - hback-porch = <2>; - hsync-len = <42>; - vback-porch = <3>; - vfront-porch = <4>; - vsync-len = <11>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; }; bl: backlight { diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index c7bf9c493646..64eb896907bf 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -363,6 +363,7 @@ CONFIG_DRM_OMAP_PANEL_TPO_TD028TTEC1=m CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m CONFIG_DRM_TILCDC=m +CONFIG_DRM_PANEL_SIMPLE=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h deleted file mode 100644 index ec154e719b11..000000000000 --- a/arch/arm/include/asm/xen/xen-ops.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_XEN_OPS_H -#define _ASM_XEN_OPS_H - -void xen_efi_runtime_setup(void); - -#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm/mach-aspeed/Kconfig b/arch/arm/mach-aspeed/Kconfig index 56007b0b6120..e8d6e9957d65 100644 --- a/arch/arm/mach-aspeed/Kconfig +++ b/arch/arm/mach-aspeed/Kconfig @@ -26,7 +26,6 @@ config MACH_ASPEED_G4 config MACH_ASPEED_G5 bool "Aspeed SoC 5th Generation" depends on ARCH_MULTI_V6 - select CPU_V6 select PINCTRL_ASPEED_G5 select FTTMR010_TIMER help diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 6c6f8fce854e..d942a3357090 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -491,11 +491,11 @@ static int ti_sysc_clkdm_init(struct device *dev, struct clk *fck, struct clk *ick, struct ti_sysc_cookie *cookie) { - if (fck) + if (!IS_ERR(fck)) cookie->clkdm = ti_sysc_find_one_clockdomain(fck); if (cookie->clkdm) return 0; - if (ick) + if (!IS_ERR(ick)) cookie->clkdm = ti_sysc_find_one_clockdomain(ick); if (cookie->clkdm) return 0; diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 7ed28982c4c3..c32d04713ba0 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o -obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c deleted file mode 100644 index d687a73044bf..000000000000 --- a/arch/arm/xen/efi.c +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2015, Linaro Limited, Shannon Zhao - */ - -#include <linux/efi.h> -#include <xen/xen-ops.h> -#include <asm/xen/xen-ops.h> - -/* Set XEN EFI runtime services function pointers. Other fields of struct efi, - * e.g. efi.systab, will be set like normal EFI. - */ -void __init xen_efi_runtime_setup(void) -{ - efi.get_time = xen_efi_get_time; - efi.set_time = xen_efi_set_time; - efi.get_wakeup_time = xen_efi_get_wakeup_time; - efi.set_wakeup_time = xen_efi_set_wakeup_time; - efi.get_variable = xen_efi_get_variable; - efi.get_next_variable = xen_efi_get_next_variable; - efi.set_variable = xen_efi_set_variable; - efi.query_variable_info = xen_efi_query_variable_info; - efi.update_capsule = xen_efi_update_capsule; - efi.query_capsule_caps = xen_efi_query_capsule_caps; - efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; - efi.reset_system = xen_efi_reset_system; -} -EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 1e57692552d9..dd6804a64f1a 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -15,7 +15,6 @@ #include <xen/xen-ops.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> -#include <asm/xen/xen-ops.h> #include <asm/system_misc.h> #include <asm/efi.h> #include <linux/interrupt.h> @@ -437,7 +436,7 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); -EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 2b2c208408bb..38fa917c8585 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -28,7 +28,10 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order) for_each_memblock(memory, reg) { if (reg->base < (phys_addr_t)0xffffffff) { - flags |= __GFP_DMA; + if (IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= __GFP_DMA32; + else + flags |= __GFP_DMA; break; } } diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 86825aa20852..97f21cc66657 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -47,30 +47,6 @@ #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) -/** - * hyp_alternate_select - Generates patchable code sequences that are - * used to switch between two implementations of a function, depending - * on the availability of a feature. - * - * @fname: a symbol name that will be defined as a function returning a - * function pointer whose type will match @orig and @alt - * @orig: A pointer to the default function, as returned by @fname when - * @cond doesn't hold - * @alt: A pointer to the alternate function, as returned by @fname - * when @cond holds - * @cond: a CPU feature (as described in asm/cpufeature.h) - */ -#define hyp_alternate_select(fname, orig, alt, cond) \ -typeof(orig) * __hyp_text fname(void) \ -{ \ - typeof(alt) *val = orig; \ - asm volatile(ALTERNATIVE("nop \n", \ - "mov %0, %1 \n", \ - cond) \ - : "+r" (val) : "r" (alt)); \ - return val; \ -} - int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h deleted file mode 100644 index e6e784051932..000000000000 --- a/arch/arm64/include/asm/xen/xen-ops.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_XEN_OPS_H -#define _ASM_XEN_OPS_H - -void xen_efi_runtime_setup(void); - -#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index bd978ad71936..3d3815020e36 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -229,20 +229,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) } } -static bool __hyp_text __true_value(void) -{ - return true; -} - -static bool __hyp_text __false_value(void) -{ - return false; -} - -static hyp_alternate_select(__check_arm_834220, - __false_value, __true_value, - ARM64_WORKAROUND_834220); - static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) { u64 par, tmp; @@ -298,7 +284,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) * resolve the IPA using the AT instruction. */ if (!(esr & ESR_ELx_S1PTW) && - (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + (cpus_have_const_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index c466060b76d6..eb0efc5557f3 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -67,10 +67,14 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, isb(); } -static hyp_alternate_select(__tlb_switch_to_guest, - __tlb_switch_to_guest_nvhe, - __tlb_switch_to_guest_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_guest_vhe(kvm, cxt); + else + __tlb_switch_to_guest_nvhe(kvm, cxt); +} static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, struct tlb_inv_context *cxt) @@ -98,10 +102,14 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, write_sysreg(0, vttbr_el2); } -static hyp_alternate_select(__tlb_switch_to_host, - __tlb_switch_to_host_nvhe, - __tlb_switch_to_host_vhe, - ARM64_HAS_VIRT_HOST_EXTN); +static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, + struct tlb_inv_context *cxt) +{ + if (has_vhe()) + __tlb_switch_to_host_vhe(kvm, cxt); + else + __tlb_switch_to_host_nvhe(kvm, cxt); +} void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -111,7 +119,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); /* * We could do so much better if we had the VA as well. @@ -154,7 +162,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) if (!has_vhe() && icache_is_vpipt()) __flush_icache_all(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -165,13 +173,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -180,13 +188,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest()(kvm, &cxt); + __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalle1); dsb(nsh); isb(); - __tlb_switch_to_host()(kvm, &cxt); + __tlb_switch_to_host(kvm, &cxt); } void __hyp_text __kvm_flush_vm_context(void) diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index a4fc65f3928d..b66215e8658e 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o -obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o) diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index 27ef5b2c43ab..cb2a0d94a144 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -5,8 +5,10 @@ #include <linux/uaccess.h> #include <linux/ptrace.h> -static int align_enable = 1; -static int align_count; +static int align_kern_enable = 1; +static int align_usr_enable = 1; +static int align_kern_count = 0; +static int align_usr_count = 0; static inline uint32_t get_ptreg(struct pt_regs *regs, uint32_t rx) { @@ -32,9 +34,6 @@ static int ldb_asm(uint32_t addr, uint32_t *valp) uint32_t val; int err; - if (!access_ok((void *)addr, 1)) - return 1; - asm volatile ( "movi %0, 0\n" "1:\n" @@ -67,9 +66,6 @@ static int stb_asm(uint32_t addr, uint32_t val) { int err; - if (!access_ok((void *)addr, 1)) - return 1; - asm volatile ( "movi %0, 0\n" "1:\n" @@ -203,8 +199,6 @@ static int stw_c(struct pt_regs *regs, uint32_t rz, uint32_t addr) if (stb_asm(addr, byte3)) return 1; - align_count++; - return 0; } @@ -226,7 +220,14 @@ void csky_alignment(struct pt_regs *regs) uint32_t addr = 0; if (!user_mode(regs)) + goto kernel_area; + + if (!align_usr_enable) { + pr_err("%s user disabled.\n", __func__); goto bad_area; + } + + align_usr_count++; ret = get_user(tmp, (uint16_t *)instruction_pointer(regs)); if (ret) { @@ -234,6 +235,19 @@ void csky_alignment(struct pt_regs *regs) goto bad_area; } + goto good_area; + +kernel_area: + if (!align_kern_enable) { + pr_err("%s kernel disabled.\n", __func__); + goto bad_area; + } + + align_kern_count++; + + tmp = *(uint16_t *)instruction_pointer(regs); + +good_area: opcode = (uint32_t)tmp; rx = opcode & 0xf; @@ -286,18 +300,32 @@ bad_area: force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); } -static struct ctl_table alignment_tbl[4] = { +static struct ctl_table alignment_tbl[5] = { + { + .procname = "kernel_enable", + .data = &align_kern_enable, + .maxlen = sizeof(align_kern_enable), + .mode = 0666, + .proc_handler = &proc_dointvec + }, + { + .procname = "user_enable", + .data = &align_usr_enable, + .maxlen = sizeof(align_usr_enable), + .mode = 0666, + .proc_handler = &proc_dointvec + }, { - .procname = "enable", - .data = &align_enable, - .maxlen = sizeof(align_enable), + .procname = "kernel_count", + .data = &align_kern_count, + .maxlen = sizeof(align_kern_count), .mode = 0666, .proc_handler = &proc_dointvec }, { - .procname = "count", - .data = &align_count, - .maxlen = sizeof(align_count), + .procname = "user_count", + .data = &align_usr_count, + .maxlen = sizeof(align_usr_count), .mode = 0666, .proc_handler = &proc_dointvec }, diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c index 10af8b6fe322..9f1fe80cc847 100644 --- a/arch/csky/abiv1/cacheflush.c +++ b/arch/csky/abiv1/cacheflush.c @@ -11,42 +11,66 @@ #include <asm/cacheflush.h> #include <asm/cachectl.h> +#define PG_dcache_clean PG_arch_1 + void flush_dcache_page(struct page *page) { - struct address_space *mapping = page_mapping(page); - unsigned long addr; + struct address_space *mapping; - if (mapping && !mapping_mapped(mapping)) { - set_bit(PG_arch_1, &(page)->flags); + if (page == ZERO_PAGE(0)) return; - } - /* - * We could delay the flush for the !page_mapping case too. But that - * case is for exec env/arg pages and those are %99 certainly going to - * get faulted into the tlb (and thus flushed) anyways. - */ - addr = (unsigned long) page_address(page); - dcache_wb_range(addr, addr + PAGE_SIZE); + mapping = page_mapping_file(page); + + if (mapping && !page_mapcount(page)) + clear_bit(PG_dcache_clean, &page->flags); + else { + dcache_wbinv_all(); + if (mapping) + icache_inv_all(); + set_bit(PG_dcache_clean, &page->flags); + } } +EXPORT_SYMBOL(flush_dcache_page); -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, - pte_t *pte) +void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) { - unsigned long addr; + unsigned long pfn = pte_pfn(*ptep); struct page *page; - unsigned long pfn; - pfn = pte_pfn(*pte); - if (unlikely(!pfn_valid(pfn))) + if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); - addr = (unsigned long) page_address(page); + if (page == ZERO_PAGE(0)) + return; + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + dcache_wbinv_all(); - if (vma->vm_flags & VM_EXEC || - pages_do_alias(addr, address & PAGE_MASK)) - cache_wbinv_all(); + if (page_mapping_file(page)) { + if (vma->vm_flags & VM_EXEC) + icache_inv_all(); + } +} + +void flush_kernel_dcache_page(struct page *page) +{ + struct address_space *mapping; + + mapping = page_mapping_file(page); + + if (!mapping || mapping_mapped(mapping)) + dcache_wbinv_all(); +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + dcache_wbinv_all(); - clear_bit(PG_arch_1, &(page)->flags); + if (vma->vm_flags & VM_EXEC) + icache_inv_all(); } diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 5f663aef9b1b..79ef9e8c1afd 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -4,46 +4,63 @@ #ifndef __ABI_CSKY_CACHEFLUSH_H #define __ABI_CSKY_CACHEFLUSH_H -#include <linux/compiler.h> +#include <linux/mm.h> #include <asm/string.h> #include <asm/cache.h> #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -#define flush_cache_mm(mm) cache_wbinv_all() +#define flush_cache_mm(mm) dcache_wbinv_all() #define flush_cache_page(vma, page, pfn) cache_wbinv_all() #define flush_cache_dup_mm(mm) cache_wbinv_all() +#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +extern void flush_kernel_dcache_page(struct page *); + +#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) +#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) + +static inline void flush_kernel_vmap_range(void *addr, int size) +{ + dcache_wbinv_all(); +} +static inline void invalidate_kernel_vmap_range(void *addr, int size) +{ + dcache_wbinv_all(); +} + +#define ARCH_HAS_FLUSH_ANON_PAGE +static inline void flush_anon_page(struct vm_area_struct *vma, + struct page *page, unsigned long vmaddr) +{ + if (PageAnon(page)) + cache_wbinv_all(); +} + /* * if (current_mm != vma->mm) cache_wbinv_range(start, end) will be broken. * Use cache_wbinv_all() here and need to be improved in future. */ -#define flush_cache_range(vma, start, end) cache_wbinv_all() -#define flush_cache_vmap(start, end) cache_wbinv_range(start, end) -#define flush_cache_vunmap(start, end) cache_wbinv_range(start, end) +extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +#define flush_cache_vmap(start, end) cache_wbinv_all() +#define flush_cache_vunmap(start, end) cache_wbinv_all() -#define flush_icache_page(vma, page) cache_wbinv_all() +#define flush_icache_page(vma, page) do {} while (0); #define flush_icache_range(start, end) cache_wbinv_range(start, end) -#define flush_icache_user_range(vma, pg, adr, len) \ - cache_wbinv_range(adr, adr + len) +#define flush_icache_user_range(vma,page,addr,len) \ + flush_dcache_page(page) #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ do { \ - cache_wbinv_all(); \ memcpy(dst, src, len); \ - cache_wbinv_all(); \ } while (0) #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ - cache_wbinv_all(); \ memcpy(dst, src, len); \ cache_wbinv_all(); \ } while (0) -#define flush_dcache_mmap_lock(mapping) do {} while (0) -#define flush_dcache_mmap_unlock(mapping) do {} while (0) - #endif /* __ABI_CSKY_CACHEFLUSH_H */ diff --git a/arch/csky/abiv1/inc/abi/page.h b/arch/csky/abiv1/inc/abi/page.h index 6336e92a103a..c864519117c7 100644 --- a/arch/csky/abiv1/inc/abi/page.h +++ b/arch/csky/abiv1/inc/abi/page.h @@ -1,13 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. -extern unsigned long shm_align_mask; +#include <asm/shmparam.h> + extern void flush_dcache_page(struct page *page); static inline unsigned long pages_do_alias(unsigned long addr1, unsigned long addr2) { - return (addr1 ^ addr2) & shm_align_mask; + return (addr1 ^ addr2) & (SHMLBA-1); } static inline void clear_user_page(void *addr, unsigned long vaddr, diff --git a/arch/csky/abiv1/mmap.c b/arch/csky/abiv1/mmap.c index b462fd50b23a..6792aca49999 100644 --- a/arch/csky/abiv1/mmap.c +++ b/arch/csky/abiv1/mmap.c @@ -9,58 +9,63 @@ #include <linux/random.h> #include <linux/io.h> -unsigned long shm_align_mask = (0x4000 >> 1) - 1; /* Sane caches */ +#define COLOUR_ALIGN(addr,pgoff) \ + ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ + (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + shm_align_mask) & ~shm_align_mask) + \ - (((pgoff) << PAGE_SHIFT) & shm_align_mask)) - -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, +/* + * We need to ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + * + * We unconditionally provide this function for all cases. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - struct vm_area_struct *vmm; - int do_color_align; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int do_align = 0; + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + do_align = filp || (flags & MAP_SHARED); + /* + * We enforce the MAP_FIXED case. + */ if (flags & MAP_FIXED) { - /* - * We do not accept a shared mapping if it would violate - * cache aliasing constraints. - */ - if ((flags & MAP_SHARED) && - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + if (flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; } if (len > TASK_SIZE) return -ENOMEM; - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = 1; + if (addr) { - if (do_color_align) + if (do_align) addr = COLOUR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); - vmm = find_vma(current->mm, addr); + + vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vmm || addr + len <= vmm->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } - addr = TASK_UNMAPPED_BASE; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { - /* At this point: (!vmm || addr < vmm->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) - return addr; - addr = vmm->vm_end; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - } + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); } diff --git a/arch/csky/include/asm/barrier.h b/arch/csky/include/asm/barrier.h index 476eb786f22d..a430e7fddf35 100644 --- a/arch/csky/include/asm/barrier.h +++ b/arch/csky/include/asm/barrier.h @@ -9,11 +9,12 @@ #define nop() asm volatile ("nop\n":::"memory") /* - * sync: completion barrier - * sync.s: completion barrier and shareable to other cores - * sync.i: completion barrier with flush cpu pipeline - * sync.is: completion barrier with flush cpu pipeline and shareable to - * other cores + * sync: completion barrier, all sync.xx instructions + * guarantee the last response recieved by bus transaction + * made by ld/st instructions before sync.s + * sync.s: inherit from sync, but also shareable to other cores + * sync.i: inherit from sync, but also flush cpu pipeline + * sync.is: the same with sync.i + sync.s * * bar.brwarw: ordering barrier for all load/store instructions before it * bar.brwarws: ordering barrier for all load/store instructions before it @@ -27,9 +28,7 @@ */ #ifdef CONFIG_CPU_HAS_CACHEV2 -#define mb() asm volatile ("bar.brwarw\n":::"memory") -#define rmb() asm volatile ("bar.brar\n":::"memory") -#define wmb() asm volatile ("bar.bwaw\n":::"memory") +#define mb() asm volatile ("sync.s\n":::"memory") #ifdef CONFIG_SMP #define __smp_mb() asm volatile ("bar.brwarws\n":::"memory") diff --git a/arch/csky/include/asm/cache.h b/arch/csky/include/asm/cache.h index d68373463676..1d5fc2f78fd7 100644 --- a/arch/csky/include/asm/cache.h +++ b/arch/csky/include/asm/cache.h @@ -24,6 +24,7 @@ void cache_wbinv_range(unsigned long start, unsigned long end); void cache_wbinv_all(void); void dma_wbinv_range(unsigned long start, unsigned long end); +void dma_inv_range(unsigned long start, unsigned long end); void dma_wb_range(unsigned long start, unsigned long end); #endif diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h index c1dfa9c10e36..80d071e2567f 100644 --- a/arch/csky/include/asm/io.h +++ b/arch/csky/include/asm/io.h @@ -4,17 +4,10 @@ #ifndef __ASM_CSKY_IO_H #define __ASM_CSKY_IO_H -#include <abi/pgtable-bits.h> +#include <asm/pgtable.h> #include <linux/types.h> #include <linux/version.h> -extern void __iomem *ioremap(phys_addr_t offset, size_t size); - -extern void iounmap(void *addr); - -extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr, - size_t size, unsigned long flags); - /* * I/O memory access primitives. Reads are ordered relative to any * following Normal memory access. Writes are ordered relative to any prior @@ -40,9 +33,17 @@ extern int remap_area_pages(unsigned long address, phys_addr_t phys_addr, #define writel(v,c) ({ wmb(); writel_relaxed((v),(c)); mb(); }) #endif -#define ioremap_nocache(phy, sz) ioremap(phy, sz) -#define ioremap_wc ioremap_nocache -#define ioremap_wt ioremap_nocache +/* + * I/O memory mapping functions. + */ +extern void __iomem *ioremap_cache(phys_addr_t addr, size_t size); +extern void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot); +extern void iounmap(void *addr); + +#define ioremap(addr, size) __ioremap((addr), (size), pgprot_noncached(PAGE_KERNEL)) +#define ioremap_wc(addr, size) __ioremap((addr), (size), pgprot_writecombine(PAGE_KERNEL)) +#define ioremap_nocache(addr, size) ioremap((addr), (size)) +#define ioremap_cache ioremap_cache #include <asm-generic/io.h> diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 0040b3a05b61..7c21985c60dc 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -258,6 +258,16 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot) { unsigned long prot = pgprot_val(_prot); + prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED | _PAGE_SO; + + return __pgprot(prot); +} + +#define pgprot_writecombine pgprot_writecombine +static inline pgprot_t pgprot_writecombine(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED; return __pgprot(prot); diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a7e84ccccbd8..a7a5b67df898 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -17,6 +17,12 @@ #define PTE_INDX_SHIFT 10 #define _PGDIR_SHIFT 22 +.macro zero_fp +#ifdef CONFIG_STACKTRACE + movi r8, 0 +#endif +.endm + .macro tlbop_begin name, val0, val1, val2 ENTRY(csky_\name) mtcr a3, ss2 @@ -96,6 +102,7 @@ ENTRY(csky_\name) SAVE_ALL 0 .endm .macro tlbop_end is_write + zero_fp RD_MEH a2 psrset ee, ie mov a0, sp @@ -120,6 +127,7 @@ tlbop_end 1 ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE + zero_fp psrset ee, ie @@ -136,9 +144,9 @@ ENTRY(csky_systemcall) mov r9, sp bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r12, 0 bt csky_syscall_trace #if defined(__CSKYABIV2__) subi sp, 8 @@ -180,7 +188,7 @@ csky_syscall_trace: ENTRY(ret_from_kernel_thread) jbsr schedule_tail - mov a0, r8 + mov a0, r10 jsr r9 jbsr ret_from_exception @@ -189,9 +197,9 @@ ENTRY(ret_from_fork) mov r9, sp bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r12, 0 bf ret_from_exception mov a0, sp /* sp = pt_regs pointer */ jbsr syscall_trace_exit @@ -209,9 +217,9 @@ ret_from_exception: bmaski r10, THREAD_SHIFT andn r9, r10 - ldw r8, (r9, TINFO_FLAGS) - andi r8, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) - cmpnei r8, 0 + ldw r12, (r9, TINFO_FLAGS) + andi r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) + cmpnei r12, 0 bt exit_work 1: RESTORE_ALL @@ -220,11 +228,11 @@ exit_work: lrw syscallid, ret_from_exception mov lr, syscallid - btsti r8, TIF_NEED_RESCHED + btsti r12, TIF_NEED_RESCHED bt work_resched mov a0, sp - mov a1, r8 + mov a1, r12 jmpi do_notify_resume work_resched: @@ -232,6 +240,7 @@ work_resched: ENTRY(csky_trap) SAVE_ALL 0 + zero_fp psrset ee mov a0, sp /* Push Stack pointer arg */ jbsr trap_c /* Call C-level trap handler */ @@ -265,6 +274,7 @@ ENTRY(csky_get_tls) ENTRY(csky_irq) SAVE_ALL 0 + zero_fp psrset ee #ifdef CONFIG_PREEMPT @@ -276,27 +286,23 @@ ENTRY(csky_irq) * Get task_struct->stack.preempt_count for current, * and increase 1. */ - ldw r8, (r9, TINFO_PREEMPT) - addi r8, 1 - stw r8, (r9, TINFO_PREEMPT) + ldw r12, (r9, TINFO_PREEMPT) + addi r12, 1 + stw r12, (r9, TINFO_PREEMPT) #endif mov a0, sp jbsr csky_do_IRQ #ifdef CONFIG_PREEMPT - subi r8, 1 - stw r8, (r9, TINFO_PREEMPT) - cmpnei r8, 0 + subi r12, 1 + stw r12, (r9, TINFO_PREEMPT) + cmpnei r12, 0 bt 2f - ldw r8, (r9, TINFO_FLAGS) - btsti r8, TIF_NEED_RESCHED + ldw r12, (r9, TINFO_FLAGS) + btsti r12, TIF_NEED_RESCHED bf 2f -1: jbsr preempt_schedule_irq /* irq en/disable is done inside */ - ldw r7, (r9, TINFO_FLAGS) /* get new tasks TI_FLAGS */ - btsti r7, TIF_NEED_RESCHED - bt 1b /* go again */ #endif 2: jmpi ret_from_exception diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c index 4c1a1934d76a..1a29f1157449 100644 --- a/arch/csky/kernel/perf_event.c +++ b/arch/csky/kernel/perf_event.c @@ -1306,7 +1306,7 @@ int csky_pmu_device_probe(struct platform_device *pdev, &csky_pmu.count_width)) { csky_pmu.count_width = DEFAULT_COUNT_WIDTH; } - csky_pmu.max_period = BIT(csky_pmu.count_width) - 1; + csky_pmu.max_period = BIT_ULL(csky_pmu.count_width) - 1; csky_pmu.plat_device = pdev; @@ -1337,7 +1337,7 @@ int csky_pmu_device_probe(struct platform_device *pdev, return ret; } -const static struct of_device_id csky_pmu_of_device_ids[] = { +static const struct of_device_id csky_pmu_of_device_ids[] = { {.compatible = "csky,csky-pmu"}, {}, }; diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index e555740c0be5..f320d9248a22 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -55,7 +55,7 @@ int copy_thread(unsigned long clone_flags, if (unlikely(p->flags & PF_KTHREAD)) { memset(childregs, 0, sizeof(struct pt_regs)); childstack->r15 = (unsigned long) ret_from_kernel_thread; - childstack->r8 = kthread_arg; + childstack->r10 = kthread_arg; childstack->r9 = usp; childregs->sr = mfcr("psr"); } else { diff --git a/arch/csky/mm/cachev1.c b/arch/csky/mm/cachev1.c index b8a75cce0b8c..494ec912abff 100644 --- a/arch/csky/mm/cachev1.c +++ b/arch/csky/mm/cachev1.c @@ -120,7 +120,12 @@ void dma_wbinv_range(unsigned long start, unsigned long end) cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); } +void dma_inv_range(unsigned long start, unsigned long end) +{ + cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); +} + void dma_wb_range(unsigned long start, unsigned long end) { - cache_op_range(start, end, DATA_CACHE|CACHE_INV, 1); + cache_op_range(start, end, DATA_CACHE|CACHE_CLR|CACHE_INV, 1); } diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c index baaf05d69f44..b61be6518e21 100644 --- a/arch/csky/mm/cachev2.c +++ b/arch/csky/mm/cachev2.c @@ -69,11 +69,20 @@ void dma_wbinv_range(unsigned long start, unsigned long end) sync_is(); } +void dma_inv_range(unsigned long start, unsigned long end) +{ + unsigned long i = start & ~(L1_CACHE_BYTES - 1); + + for (; i < end; i += L1_CACHE_BYTES) + asm volatile("dcache.iva %0\n"::"r"(i):"memory"); + sync_is(); +} + void dma_wb_range(unsigned long start, unsigned long end) { unsigned long i = start & ~(L1_CACHE_BYTES - 1); for (; i < end; i += L1_CACHE_BYTES) - asm volatile("dcache.civa %0\n"::"r"(i):"memory"); + asm volatile("dcache.cva %0\n"::"r"(i):"memory"); sync_is(); } diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 602a60d47a94..06e85b565454 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -14,69 +14,50 @@ #include <linux/version.h> #include <asm/cache.h> -void arch_dma_prep_coherent(struct page *page, size_t size) -{ - if (PageHighMem(page)) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - do { - void *ptr = kmap_atomic(page); - size_t _size = (size < PAGE_SIZE) ? size : PAGE_SIZE; - - memset(ptr, 0, _size); - dma_wbinv_range((unsigned long)ptr, - (unsigned long)ptr + _size); - - kunmap_atomic(ptr); - - page++; - size -= PAGE_SIZE; - count--; - } while (count); - } else { - void *ptr = page_address(page); - - memset(ptr, 0, size); - dma_wbinv_range((unsigned long)ptr, (unsigned long)ptr + size); - } -} - static inline void cache_op(phys_addr_t paddr, size_t size, void (*fn)(unsigned long start, unsigned long end)) { - struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); - unsigned int offset = paddr & ~PAGE_MASK; - size_t left = size; - unsigned long start; + struct page *page = phys_to_page(paddr); + void *start = __va(page_to_phys(page)); + unsigned long offset = offset_in_page(paddr); + size_t left = size; do { size_t len = left; + if (offset + len > PAGE_SIZE) + len = PAGE_SIZE - offset; + if (PageHighMem(page)) { - void *addr; + start = kmap_atomic(page); - if (offset + len > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - } - len = PAGE_SIZE - offset; - } + fn((unsigned long)start + offset, + (unsigned long)start + offset + len); - addr = kmap_atomic(page); - start = (unsigned long)(addr + offset); - fn(start, start + len); - kunmap_atomic(addr); + kunmap_atomic(start); } else { - start = (unsigned long)phys_to_virt(paddr); - fn(start, start + size); + fn((unsigned long)start + offset, + (unsigned long)start + offset + len); } offset = 0; + page++; + start += PAGE_SIZE; left -= len; } while (left); } +static void dma_wbinv_set_zero_range(unsigned long start, unsigned long end) +{ + memset((void *)start, 0, end - start); + dma_wbinv_range(start, end); +} + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range); +} + void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { @@ -98,11 +79,10 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, { switch (dir) { case DMA_TO_DEVICE: - cache_op(paddr, size, dma_wb_range); - break; + return; case DMA_FROM_DEVICE: case DMA_BIDIRECTIONAL: - cache_op(paddr, size, dma_wbinv_range); + cache_op(paddr, size, dma_inv_range); break; default: BUG(); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index eb0dc9e5065f..d4c2292ea46b 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -60,22 +60,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - pr_info("Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages_inc(); - } -} -#endif - extern char __init_begin[], __init_end[]; void free_initmem(void) diff --git a/arch/csky/mm/ioremap.c b/arch/csky/mm/ioremap.c index 8473b6bdf512..e13cd3497628 100644 --- a/arch/csky/mm/ioremap.c +++ b/arch/csky/mm/ioremap.c @@ -8,12 +8,12 @@ #include <asm/pgtable.h> -void __iomem *ioremap(phys_addr_t addr, size_t size) +static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size, + pgprot_t prot, void *caller) { phys_addr_t last_addr; unsigned long offset, vaddr; struct vm_struct *area; - pgprot_t prot; last_addr = addr + size - 1; if (!size || last_addr < addr) @@ -23,15 +23,12 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) addr &= PAGE_MASK; size = PAGE_ALIGN(size + offset); - area = get_vm_area_caller(size, VM_ALLOC, __builtin_return_address(0)); + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; vaddr = (unsigned long)area->addr; - prot = __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | - _PAGE_GLOBAL | _CACHE_UNCACHED | _PAGE_SO); - if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) { free_vm_area(area); return NULL; @@ -39,7 +36,20 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) return (void __iomem *)(vaddr + offset); } -EXPORT_SYMBOL(ioremap); + +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) +{ + return __ioremap_caller(phys_addr, size, prot, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__ioremap); + +void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) +{ + return __ioremap_caller(phys_addr, size, PAGE_KERNEL, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); void iounmap(void __iomem *addr) { @@ -51,10 +61,9 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { if (!pfn_valid(pfn)) { - vma_prot.pgprot |= _PAGE_SO; return pgprot_noncached(vma_prot); } else if (file->f_flags & O_SYNC) { - return pgprot_noncached(vma_prot); + return pgprot_writecombine(vma_prot); } return vma_prot; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 63a9f33aa43e..5cfc9d347826 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -99,7 +99,7 @@ miscintc: interrupt-controller@18060010 { compatible = "qca,ar7240-misc-intc"; - reg = <0x18060010 0x4>; + reg = <0x18060010 0x8>; interrupt-parent = <&cpuintc>; interrupts = <6>; diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c index af44b35d79a1..b4328b3b5288 100644 --- a/arch/mips/fw/arc/memory.c +++ b/arch/mips/fw/arc/memory.c @@ -160,7 +160,6 @@ void __init prom_meminit(void) void __init prom_free_prom_memory(void) { - unsigned long addr; int i; if (prom_flags & PROM_FLAG_DONT_FREE_TEMP) diff --git a/arch/mips/include/asm/octeon/cvmx-ipd.h b/arch/mips/include/asm/octeon/cvmx-ipd.h index cbdc14b77435..adab7b54c3b4 100644 --- a/arch/mips/include/asm/octeon/cvmx-ipd.h +++ b/arch/mips/include/asm/octeon/cvmx-ipd.h @@ -36,6 +36,7 @@ #include <asm/octeon/octeon-feature.h> #include <asm/octeon/cvmx-ipd-defs.h> +#include <asm/octeon/cvmx-pip-defs.h> enum cvmx_ipd_mode { CVMX_IPD_OPC_MODE_STT = 0LL, /* All blocks DRAM, not cached in L2 */ diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index 071053ece677..5d70babfc9ee 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -52,6 +52,7 @@ # endif #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 /* whitelists for checksyscalls */ #define __IGNORE_fadvise64_64 diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c index fa62cd1dff93..6a7afe7ef4d3 100644 --- a/arch/mips/kernel/cpu-bugs64.c +++ b/arch/mips/kernel/cpu-bugs64.c @@ -24,7 +24,8 @@ static char r4kwar[] __initdata = static char daddiwar[] __initdata = "Enable CPU_DADDI_WORKAROUNDS to rectify."; -static inline void align_mod(const int align, const int mod) +static __always_inline __init +void align_mod(const int align, const int mod) { asm volatile( ".set push\n\t" @@ -38,8 +39,9 @@ static inline void align_mod(const int align, const int mod) : "n"(align), "n"(mod)); } -static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, - const int align, const int mod) +static __always_inline __init +void mult_sh_align_mod(long *v1, long *v2, long *w, + const int align, const int mod) { unsigned long flags; int m1, m2; @@ -113,7 +115,7 @@ static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, *w = lw; } -static inline void check_mult_sh(void) +static __always_inline __init void check_mult_sh(void) { long v1[8], v2[8], w[8]; int bug, fix, i; @@ -176,7 +178,7 @@ asmlinkage void __init do_daddi_ov(struct pt_regs *regs) exception_exit(prev_state); } -static inline void check_daddi(void) +static __init void check_daddi(void) { extern asmlinkage void handle_daddi_ov(void); unsigned long flags; @@ -242,7 +244,7 @@ static inline void check_daddi(void) int daddiu_bug = IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1; -static inline void check_daddiu(void) +static __init void check_daddiu(void) { long v, w, tmp; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index b8249c233754..5eec13b8d222 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -108,6 +108,9 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type) return; } + if (start < PHYS_OFFSET) + return; + memblock_add(start, size); /* Reserve any memory except the ordinary RAM ranges. */ switch (type) { @@ -321,7 +324,7 @@ static void __init bootmem_init(void) * Reserve any memory between the start of RAM and PHYS_OFFSET */ if (ramstart > PHYS_OFFSET) - memblock_reserve(PHYS_OFFSET, PFN_UP(ramstart) - PHYS_OFFSET); + memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET); if (PFN_UP(ramstart) > ARCH_PFN_OFFSET) { pr_info("Wasting %lu bytes for tracking %lu unused pages\n", diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index b0e25e913bdb..3f16f3823031 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -80,6 +80,7 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, save_static_function(sys_fork); save_static_function(sys_clone); +save_static_function(sys_clone3); SYSCALL_DEFINE1(set_thread_area, unsigned long, addr) { diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index c9c879ec9b6d..e7c5ab38e403 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -373,4 +373,4 @@ 432 n32 fsmount sys_fsmount 433 n32 fspick sys_fspick 434 n32 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 n32 clone3 __sys_clone3 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index bbce9159caa1..13cd66581f3b 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -349,4 +349,4 @@ 432 n64 fsmount sys_fsmount 433 n64 fspick sys_fspick 434 n64 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 n64 clone3 __sys_clone3 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 9653591428ec..353539ea4140 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -422,4 +422,4 @@ 432 o32 fsmount sys_fsmount 433 o32 fspick sys_fspick 434 o32 pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 o32 clone3 __sys_clone3 diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c index 4abb92e0fc39..4254ac4ec616 100644 --- a/arch/mips/loongson64/common/mem.c +++ b/arch/mips/loongson64/common/mem.c @@ -3,6 +3,7 @@ */ #include <linux/fs.h> #include <linux/fcntl.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <asm/bootinfo.h> @@ -64,24 +65,22 @@ void __init prom_init_memory(void) node_id = loongson_memmap->map[i].node_id; mem_type = loongson_memmap->map[i].mem_type; - if (node_id == 0) { - switch (mem_type) { - case SYSTEM_RAM_LOW: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_HIGH: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RAM); - break; - case SYSTEM_RAM_RESERVED: - add_memory_region(loongson_memmap->map[i].mem_start, - (u64)loongson_memmap->map[i].mem_size << 20, - BOOT_MEM_RESERVED); - break; - } + if (node_id != 0) + continue; + + switch (mem_type) { + case SYSTEM_RAM_LOW: + memblock_add(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; + case SYSTEM_RAM_HIGH: + memblock_add(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; + case SYSTEM_RAM_RESERVED: + memblock_reserve(loongson_memmap->map[i].mem_start, + (u64)loongson_memmap->map[i].mem_size << 20); + break; } } } diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c index ffefc1cb2612..98c3a7feb10f 100644 --- a/arch/mips/loongson64/common/serial.c +++ b/arch/mips/loongson64/common/serial.c @@ -110,7 +110,7 @@ static int __init serial_init(void) } module_init(serial_init); -static void __init serial_exit(void) +static void __exit serial_exit(void) { platform_device_unregister(&uart8250_device); } diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 414e97de5dc0..8f20d2cb3767 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -142,8 +142,6 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; @@ -156,16 +154,12 @@ static void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RAM); memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; case SYSTEM_RAM_RESERVED: pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", (u32)node_id, mem_type, mem_start, mem_size); - add_memory_region((node_id << 44) + mem_start, - (u64)mem_size << 20, BOOT_MEM_RESERVED); memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); break; @@ -191,8 +185,6 @@ static void __init node_mem_init(unsigned int node) NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; - free_bootmem_with_active_regions(node, end_pfn); - if (node == 0) { /* kernel end address */ unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end)); @@ -209,8 +201,6 @@ static void __init node_mem_init(unsigned int node) memblock_reserve((node_addrspace_offset | 0xfe000000), 32 << 20); } - - sparse_memory_present_with_active_regions(node); } static __init void prom_meminit(void) @@ -227,6 +217,7 @@ static __init void prom_meminit(void) cpumask_clear(&__node_data[(node)]->cpumask); } } + memblocks_present(); max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) { diff --git a/arch/mips/pmcs-msp71xx/msp_prom.c b/arch/mips/pmcs-msp71xx/msp_prom.c index dfb527961a27..800a21b8b8b0 100644 --- a/arch/mips/pmcs-msp71xx/msp_prom.c +++ b/arch/mips/pmcs-msp71xx/msp_prom.c @@ -61,6 +61,7 @@ int init_debug = 1; /* memory blocks */ struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS]; +#define MAX_PROM_MEM 5 static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata; static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata; static unsigned int nr_prom_mem __initdata; @@ -358,7 +359,7 @@ void __init prom_meminit(void) p++; if (type == BOOT_MEM_ROM_DATA) { - if (nr_prom_mem >= 5) { + if (nr_prom_mem >= MAX_PROM_MEM) { pr_err("Too many ROM DATA regions"); continue; } @@ -377,7 +378,6 @@ void __init prom_free_prom_memory(void) char *ptr; int len = 0; int i; - unsigned long addr; /* * preserve environment variables and command line from pmon/bbload diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 69cfa0a5339e..807f0f782f75 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -59,7 +59,7 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg ifndef CONFIG_CPU_MIPSR6 ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) - obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y)) + obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) ccflags-vdso += -DDISABLE_MIPS_VDSO endif endif diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c deleted file mode 100644 index e8243c7fd5b5..000000000000 --- a/arch/mips/vdso/gettimeofday.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 Imagination Technologies - * Author: Alex Smith <alex.smith@imgtec.com> - */ - -#include "vdso.h" - -#include <linux/compiler.h> -#include <linux/time.h> - -#include <asm/clocksource.h> -#include <asm/io.h> -#include <asm/unistd.h> -#include <asm/vdso.h> - -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - -static __always_inline long gettimeofday_fallback(struct timeval *_tv, - struct timezone *_tz) -{ - register struct timezone *tz asm("a1") = _tz; - register struct timeval *tv asm("a0") = _tv; - register long ret asm("v0"); - register long nr asm("v0") = __NR_gettimeofday; - register long error asm("a3"); - - asm volatile( - " syscall\n" - : "=r" (ret), "=r" (error) - : "r" (tv), "r" (tz), "r" (nr) - : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); - - return error ? -ret : ret; -} - -#endif - -static __always_inline long clock_gettime_fallback(clockid_t _clkid, - struct timespec *_ts) -{ - register struct timespec *ts asm("a1") = _ts; - register clockid_t clkid asm("a0") = _clkid; - register long ret asm("v0"); - register long nr asm("v0") = __NR_clock_gettime; - register long error asm("a3"); - - asm volatile( - " syscall\n" - : "=r" (ret), "=r" (error) - : "r" (clkid), "r" (ts), "r" (nr) - : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); - - return error ? -ret : ret; -} - -static __always_inline int do_realtime_coarse(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - - do { - start_seq = vdso_data_read_begin(data); - - ts->tv_sec = data->xtime_sec; - ts->tv_nsec = data->xtime_nsec >> data->cs_shift; - } while (vdso_data_read_retry(data, start_seq)); - - return 0; -} - -static __always_inline int do_monotonic_coarse(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 to_mono_sec; - u64 to_mono_nsec; - - do { - start_seq = vdso_data_read_begin(data); - - ts->tv_sec = data->xtime_sec; - ts->tv_nsec = data->xtime_nsec >> data->cs_shift; - - to_mono_sec = data->wall_to_mono_sec; - to_mono_nsec = data->wall_to_mono_nsec; - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_sec += to_mono_sec; - timespec_add_ns(ts, to_mono_nsec); - - return 0; -} - -#ifdef CONFIG_CSRC_R4K - -static __always_inline u64 read_r4k_count(void) -{ - unsigned int count; - - __asm__ __volatile__( - " .set push\n" - " .set mips32r2\n" - " rdhwr %0, $2\n" - " .set pop\n" - : "=r" (count)); - - return count; -} - -#endif - -#ifdef CONFIG_CLKSRC_MIPS_GIC - -static __always_inline u64 read_gic_count(const union mips_vdso_data *data) -{ - void __iomem *gic = get_gic(data); - u32 hi, hi2, lo; - - do { - hi = __raw_readl(gic + sizeof(lo)); - lo = __raw_readl(gic); - hi2 = __raw_readl(gic + sizeof(lo)); - } while (hi2 != hi); - - return (((u64)hi) << 32) + lo; -} - -#endif - -static __always_inline u64 get_ns(const union mips_vdso_data *data) -{ - u64 cycle_now, delta, nsec; - - switch (data->clock_mode) { -#ifdef CONFIG_CSRC_R4K - case VDSO_CLOCK_R4K: - cycle_now = read_r4k_count(); - break; -#endif -#ifdef CONFIG_CLKSRC_MIPS_GIC - case VDSO_CLOCK_GIC: - cycle_now = read_gic_count(data); - break; -#endif - default: - return 0; - } - - delta = (cycle_now - data->cs_cycle_last) & data->cs_mask; - - nsec = (delta * data->cs_mult) + data->xtime_nsec; - nsec >>= data->cs_shift; - - return nsec; -} - -static __always_inline int do_realtime(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 ns; - - do { - start_seq = vdso_data_read_begin(data); - - if (data->clock_mode == VDSO_CLOCK_NONE) - return -ENOSYS; - - ts->tv_sec = data->xtime_sec; - ns = get_ns(data); - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_nsec = 0; - timespec_add_ns(ts, ns); - - return 0; -} - -static __always_inline int do_monotonic(struct timespec *ts, - const union mips_vdso_data *data) -{ - u32 start_seq; - u64 ns; - u64 to_mono_sec; - u64 to_mono_nsec; - - do { - start_seq = vdso_data_read_begin(data); - - if (data->clock_mode == VDSO_CLOCK_NONE) - return -ENOSYS; - - ts->tv_sec = data->xtime_sec; - ns = get_ns(data); - - to_mono_sec = data->wall_to_mono_sec; - to_mono_nsec = data->wall_to_mono_nsec; - } while (vdso_data_read_retry(data, start_seq)); - - ts->tv_sec += to_mono_sec; - ts->tv_nsec = 0; - timespec_add_ns(ts, ns + to_mono_nsec); - - return 0; -} - -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - -/* - * This is behind the ifdef so that we don't provide the symbol when there's no - * possibility of there being a usable clocksource, because there's nothing we - * can do without it. When libc fails the symbol lookup it should fall back on - * the standard syscall path. - */ -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - const union mips_vdso_data *data = get_vdso_data(); - struct timespec ts; - int ret; - - ret = do_realtime(&ts, data); - if (ret) - return gettimeofday_fallback(tv, tz); - - if (tv) { - tv->tv_sec = ts.tv_sec; - tv->tv_usec = ts.tv_nsec / 1000; - } - - if (tz) { - tz->tz_minuteswest = data->tz_minuteswest; - tz->tz_dsttime = data->tz_dsttime; - } - - return 0; -} - -#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ - -int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts) -{ - const union mips_vdso_data *data = get_vdso_data(); - int ret = -1; - - switch (clkid) { - case CLOCK_REALTIME_COARSE: - ret = do_realtime_coarse(ts, data); - break; - case CLOCK_MONOTONIC_COARSE: - ret = do_monotonic_coarse(ts, data); - break; - case CLOCK_REALTIME: - ret = do_realtime(ts, data); - break; - case CLOCK_MONOTONIC: - ret = do_monotonic(ts, data); - break; - default: - break; - } - - if (ret) - ret = clock_gettime_fallback(clkid, ts); - - return ret; -} diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 574eca33f893..d97db3ad9aae 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -254,7 +254,13 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); -extern int radix__has_transparent_hugepage(void); +static inline int radix__has_transparent_hugepage(void) +{ + /* For radix 2M at PMD level means thp */ + if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT) + return 1; + return 0; +} #endif extern int __meminit radix__vmemmap_create_mapping(unsigned long start, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d7fcdfa7fee4..ec2547cc5ecb 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -36,8 +36,8 @@ #include "book3s.h" #include "trace.h" -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ /* #define EXIT_DEBUG */ @@ -69,8 +69,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "pthru_all", VCPU_STAT(pthru_all) }, { "pthru_host", VCPU_STAT(pthru_host) }, { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, - { "largepages_2M", VM_STAT(num_2M_pages) }, - { "largepages_1G", VM_STAT(num_1G_pages) }, + { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) }, + { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) }, { NULL } }; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index d1f390ac9cdb..64733b9cb20a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -406,6 +406,8 @@ int hash__has_transparent_hugepage(void) return 1; } +EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3a1fbf9cb8f8..6ee17d09649c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1027,13 +1027,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, return old_pmd; } -int radix__has_transparent_hugepage(void) -{ - /* For radix 2M at PMD level means thp */ - if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT) - return 1; - return 0; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a44f6281ca3a..4e08246acd79 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -172,6 +172,21 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } +static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, + unsigned long page_size) +{ + unsigned long nr_pfn = page_size / sizeof(struct page); + unsigned long start_pfn = page_to_pfn((struct page *)start); + + if ((start_pfn + nr_pfn) > altmap->end_pfn) + return true; + + if (start_pfn < altmap->base_pfn) + return true; + + return false; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { @@ -194,7 +209,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, * fail due to alignment issues when using 16MB hugepages, so * fall back to system memory if the altmap allocation fail. */ - if (altmap) { + if (altmap && !altmap_cross_boundary(altmap, start, page_size)) { p = altmap_alloc_block_buf(page_size, altmap); if (!p) pr_debug("altmap block allocation failed, falling back to system memory"); diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 5a02b7d50940..9c992a88d858 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -22,6 +22,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) +#define REG_SC __REG_SEL(sc.d, sc.w) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index da7aa88113c2..2d592da1e776 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -98,7 +98,26 @@ _save_context: */ .macro RESTORE_ALL REG_L a0, PT_SSTATUS(sp) - REG_L a2, PT_SEPC(sp) + /* + * The current load reservation is effectively part of the processor's + * state, in the sense that load reservations cannot be shared between + * different hart contexts. We can't actually save and restore a load + * reservation, so instead here we clear any existing reservation -- + * it's always legal for implementations to clear load reservations at + * any point (as long as the forward progress guarantee is kept, but + * we'll ignore that here). + * + * Dangling load reservations can be the result of taking a trap in the + * middle of an LR/SC sequence, but can also be the result of a taken + * forward branch around an SC -- which is how we implement CAS. As a + * result we need to clear reservations between the last CAS and the + * jump back to the new context. While it is unlikely the store + * completes, implementations are allowed to expand reservations to be + * arbitrarily large. + */ + REG_L a2, PT_SEPC(sp) + REG_SC x0, a2, PT_SEPC(sp) + csrw CSR_SSTATUS, a0 csrw CSR_SEPC, a2 diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f0ba71304b6e..83f7d12042fb 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -11,6 +11,7 @@ #include <linux/swap.h> #include <linux/sizes.h> #include <linux/of_fdt.h> +#include <linux/libfdt.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -82,6 +83,8 @@ disable: } #endif /* CONFIG_BLK_DEV_INITRD */ +static phys_addr_t dtb_early_pa __initdata; + void __init setup_bootmem(void) { struct memblock_region *reg; @@ -117,7 +120,12 @@ void __init setup_bootmem(void) setup_initrd(); #endif /* CONFIG_BLK_DEV_INITRD */ - early_init_fdt_reserve_self(); + /* + * Avoid using early_init_fdt_reserve_self() since __pa() does + * not work for DTB pointers that are fixmap addresses + */ + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + early_init_fdt_scan_reserved_mem(); memblock_allow_resize(); memblock_dump_all(); @@ -393,6 +401,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) /* Save pointer to DTB for early FDT parsing */ dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); + /* Save physical address for memblock reservation */ + dtb_early_pa = dtb_pa; } static void __init setup_vm_final(void) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 347f48702edb..38d64030aacf 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -44,6 +44,7 @@ CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y @@ -69,12 +70,13 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y CONFIG_MODULE_SIG_SHA256=y +CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -370,6 +372,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y @@ -424,6 +427,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -435,6 +439,7 @@ CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_NETDEVICES=y CONFIG_BONDING=m @@ -489,6 +494,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set @@ -538,15 +544,16 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m @@ -580,6 +587,8 @@ CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y @@ -589,6 +598,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -648,12 +658,15 @@ CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m @@ -664,10 +677,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -739,7 +748,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8514b8b9500f..25f799849582 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -44,6 +44,7 @@ CONFIG_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y @@ -66,11 +67,12 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y CONFIG_MODULE_SIG_SHA256=y +CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -363,6 +365,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m +# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -418,6 +421,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_WRITECACHE=m +CONFIG_DM_CLONE=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -429,6 +433,7 @@ CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_DM_INTEGRITY=m CONFIG_NETDEVICES=y @@ -484,6 +489,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set @@ -533,16 +539,16 @@ CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m +CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m @@ -573,6 +579,8 @@ CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y @@ -581,6 +589,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -639,12 +648,15 @@ CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -656,10 +668,6 @@ CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_AEGIS128L=m -CONFIG_CRYPTO_AEGIS256=m -CONFIG_CRYPTO_MORUS640=m -CONFIG_CRYPTO_MORUS1280=m CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_OFB=m @@ -727,7 +735,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index be09a208b608..20c51e5d9353 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -61,7 +61,7 @@ CONFIG_RAW_DRIVER=y CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_DIMLIB is not set +CONFIG_LSM="yama,loadpin,safesetid,integrity" CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index d3f09526ee19..61467b9eecc7 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -41,7 +41,7 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg") #undef __ATOMIC_OP #define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier) \ -static inline void op_name(op_type val, op_type *ptr) \ +static __always_inline void op_name(op_type val, op_type *ptr) \ { \ asm volatile( \ op_string " %[ptr],%[val]\n" \ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index b8833ac983fa..eb7eed43e780 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -56,7 +56,7 @@ __bitops_byte(unsigned long nr, volatile unsigned long *ptr) return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); } -static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) +static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -77,7 +77,7 @@ static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_or(mask, (long *)addr); } -static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) +static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -98,8 +98,8 @@ static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_and(mask, (long *)addr); } -static inline void arch_change_bit(unsigned long nr, - volatile unsigned long *ptr) +static __always_inline void arch_change_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index a092f63aac6a..c0f3bfeddcbe 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -171,7 +171,7 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t; * * Returns 1 if @func is available for @opcode, 0 otherwise */ -static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) +static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) { register unsigned long r0 asm("0") = 0; /* query function */ register unsigned long r1 asm("1") = (unsigned long) mask; diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index ceeb552d3472..819803a97c2b 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -28,6 +28,8 @@ asm(".include \"asm/cpu_mf-insn.h\"\n"); CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) +#define CPU_MF_SF_RIBM_NOTAV 0x1 /* Sampling unavailable */ + /* CPU measurement facility support */ static inline int cpum_cf_avail(void) { @@ -69,7 +71,8 @@ struct hws_qsi_info_block { /* Bit(s) */ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ unsigned long tear; /* 24-31: TEAR contents */ unsigned long dear; /* 32-39: DEAR contents */ - unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int rsvrd0:24; /* 40-42: reserved */ + unsigned int ribm:8; /* 43: Reserved by IBM */ unsigned int cpu_speed; /* 44-47: CPU speed */ unsigned long long rsvrd1; /* 48-55: reserved */ unsigned long long rsvrd2; /* 56-63: reserved */ @@ -220,7 +223,8 @@ enum stcctm_ctr_set { MT_DIAG = 5, MT_DIAG_CLEARING = 9, /* clears loss-of-MT-ctr-data alert */ }; -static inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest) + +static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest) { int cc; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bb59dd964590..de8f0bf5f238 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -12,8 +12,6 @@ #include <asm/page.h> #include <asm/pgtable.h> - -#define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range free_pgd_range #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -23,6 +21,13 @@ pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +static inline bool is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return false; +} + /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index e548ec1ec12c..39f747d63758 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -20,7 +20,7 @@ * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. */ -static inline bool arch_static_branch(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table,\"aw\"\n" @@ -34,7 +34,7 @@ label: return true; } -static inline bool arch_static_branch_jump(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 36c578c0ff96..5ff98d76a66c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -997,9 +997,9 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_NODAT 0x400 #define IPTE_GUEST_ASCE 0x800 -static inline void __ptep_ipte(unsigned long address, pte_t *ptep, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, + unsigned long opt, unsigned long asce, + int local) { unsigned long pto = (unsigned long) ptep; @@ -1020,8 +1020,8 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep, : [r1] "a" (pto), [m4] "i" (local) : "memory"); } -static inline void __ptep_ipte_range(unsigned long address, int nr, - pte_t *ptep, int local) +static __always_inline void __ptep_ipte_range(unsigned long address, int nr, + pte_t *ptep, int local) { unsigned long pto = (unsigned long) ptep; @@ -1269,7 +1269,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) #define pte_offset_kernel(pmd, address) pte_offset(pmd, address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) -#define pte_unmap(pte) do { } while (0) + +static inline void pte_unmap(pte_t *pte) { } static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { @@ -1435,9 +1436,9 @@ static inline void __pmdp_csp(pmd_t *pmdp) #define IDTE_NODAT 0x1000 #define IDTE_GUEST_ASCE 0x2000 -static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, + unsigned long opt, unsigned long asce, + int local) { unsigned long sto; @@ -1461,9 +1462,9 @@ static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, } } -static inline void __pudp_idte(unsigned long addr, pud_t *pudp, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, + unsigned long opt, unsigned long asce, + int local) { unsigned long r3o; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 78e8a888306d..e3f238e8c611 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -111,7 +111,7 @@ struct qib { /* private: */ u8 res[88]; /* public: */ - u8 parm[QDIO_MAX_BUFFERS_PER_Q]; + u8 parm[128]; } __attribute__ ((packed, aligned(256))); /** diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 5f1fd1581330..2654e348801a 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -390,7 +390,7 @@ static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, debug_sprintf_event(cf_diag_dbg, 6, "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" - " need %zd rc:%d\n", + " need %zd rc %d\n", __func__, ctrset, ctrset_size, cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); return need; @@ -567,7 +567,7 @@ static int cf_diag_add(struct perf_event *event, int flags) int err = 0; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x cpuhw:%p\n", + "%s event %p cpu %d flags %#x cpuhw %p\n", __func__, event, event->cpu, flags, cpuhw); if (cpuhw->flags & PMU_F_IN_USE) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 544a02e944c6..3d8b12a9a6ff 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -803,6 +803,12 @@ static int __hw_perf_event_init(struct perf_event *event) goto out; } + if (si.ribm & CPU_MF_SF_RIBM_NOTAV) { + pr_warn("CPU Measurement Facility sampling is temporarily not available\n"); + err = -EBUSY; + goto out; + } + /* Always enable basic sampling */ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; @@ -895,7 +901,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) /* Check online status of the CPU to which the event is pinned */ if (event->cpu >= 0 && !cpu_online(event->cpu)) - return -ENODEV; + return -ENODEV; /* Force reset of idle/hv excludes regardless of what the * user requested. diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f6db0f1bc867..d047e846e1b9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -332,7 +332,7 @@ static inline int plo_test_bit(unsigned char nr) return cc == 0; } -static inline void __insn32_query(unsigned int opcode, u8 query[32]) +static __always_inline void __insn32_query(unsigned int opcode, u8 *query) { register unsigned long r0 asm("0") = 0; /* query function */ register unsigned long r1 asm("1") = (unsigned long) query; @@ -340,9 +340,9 @@ static inline void __insn32_query(unsigned int opcode, u8 query[32]) asm volatile( /* Parameter regs are ignored */ " .insn rrf,%[opc] << 16,2,4,6,0\n" - : "=m" (*query) + : : "d" (r0), "a" (r1), [opc] "i" (opcode) - : "cc"); + : "cc", "memory"); } #define INSN_SORTL 0xb938 diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 9bdff4defef1..e585a62d6530 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -66,7 +66,7 @@ static inline int clp_get_ilp(unsigned long *ilp) /* * Call Logical Processor with c=0, the give constant lps and an lpcb request. */ -static inline int clp_req(void *data, unsigned int lps) +static __always_inline int clp_req(void *data, unsigned int lps) { struct { u8 _[CLP_BLK_SIZE]; } *req = data; u64 ignored; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 23edf56cf577..50eb430b0ad8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -219,13 +219,6 @@ enum { PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) -/* - * The mask used to denote special SPTEs, which can be either MMIO SPTEs or - * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting - * with the SVE bit in EPT PTEs. - */ -#define SPTE_SPECIAL_MASK (1ULL << 62) - /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 63316036f85a..9c5029cf6f3f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -485,6 +485,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); @@ -618,16 +619,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, */ case 0x1f: case 0xb: { - int i, level_type; + int i; - /* read more entries until level_type is zero */ - for (i = 1; ; ++i) { + /* + * We filled in entry[0] for CPUID(EAX=<function>, + * ECX=00H) above. If its level type (ECX[15:8]) is + * zero, then the leaf is unimplemented, and we're + * done. Otherwise, continue to populate entries + * until the level type (ECX[15:8]) of the previously + * added entry is zero. + */ + for (i = 1; entry[i - 1].ecx & 0xff00; ++i) { if (*nent >= maxnent) goto out; - level_type = entry[i - 1].ecx & 0xff00; - if (!level_type) - break; do_host_cpuid(&entry[i], function, i); ++*nent; } @@ -969,53 +974,66 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); /* - * If no match is found, check whether we exceed the vCPU's limit - * and return the content of the highest valid _standard_ leaf instead. - * This is to satisfy the CPUID specification. + * If the basic or extended CPUID leaf requested is higher than the + * maximum supported basic or extended leaf, respectively, then it is + * out of range. */ -static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, - u32 function, u32 index) +static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function) { - struct kvm_cpuid_entry2 *maxlevel; - - maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); - if (!maxlevel || maxlevel->eax >= function) - return NULL; - if (function & 0x80000000) { - maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); - if (!maxlevel) - return NULL; - } - return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); + struct kvm_cpuid_entry2 *max; + + max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); + return max && function <= max->eax; } bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) { u32 function = *eax, index = *ecx; - struct kvm_cpuid_entry2 *best; - bool entry_found = true; - - best = kvm_find_cpuid_entry(vcpu, function, index); - - if (!best) { - entry_found = false; - if (!check_limit) - goto out; + struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid_entry2 *max; + bool found; - best = check_cpuid_limit(vcpu, function, index); + entry = kvm_find_cpuid_entry(vcpu, function, index); + found = entry; + /* + * Intel CPUID semantics treats any query for an out-of-range + * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were + * requested. AMD CPUID semantics returns all zeroes for any + * undefined leaf, whether or not the leaf is in range. + */ + if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) && + !cpuid_function_in_range(vcpu, function)) { + max = kvm_find_cpuid_entry(vcpu, 0, 0); + if (max) { + function = max->eax; + entry = kvm_find_cpuid_entry(vcpu, function, index); + } } - -out: - if (best) { - *eax = best->eax; - *ebx = best->ebx; - *ecx = best->ecx; - *edx = best->edx; - } else + if (entry) { + *eax = entry->eax; + *ebx = entry->ebx; + *ecx = entry->ecx; + *edx = entry->edx; + } else { *eax = *ebx = *ecx = *edx = 0; - trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); - return entry_found; + /* + * When leaf 0BH or 1FH is defined, CL is pass-through + * and EDX is always the x2APIC ID, even for undefined + * subleaves. Index 1 will exist iff the leaf is + * implemented, so we pass through CL iff leaf 1 + * exists. EDX can be copied from any existing index. + */ + if (function == 0xb || function == 0x1f) { + entry = kvm_find_cpuid_entry(vcpu, function, 1); + if (entry) { + *ecx = index & 0xff; + *edx = entry->edx; + } + } + } + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found); + return found; } EXPORT_SYMBOL_GPL(kvm_cpuid); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3a3a6854dcca..87b0fcc23ef8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -66,9 +66,10 @@ #define X2APIC_BROADCAST 0xFFFFFFFFul static bool lapic_timer_advance_dynamic __read_mostly; -#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 -#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000 -#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 +#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ +#define LAPIC_TIMER_ADVANCE_NS_INIT 1000 +#define LAPIC_TIMER_ADVANCE_NS_MAX 5000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -1504,8 +1505,8 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } - if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX)) - timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) + timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; apic->lapic_timer.timer_advance_ns = timer_advance_ns; } @@ -2302,7 +2303,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) HRTIMER_MODE_ABS_HARD); apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { - apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; lapic_timer_advance_dynamic = true; } else { apic->lapic_timer.timer_advance_ns = timer_advance_ns; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5269aa057dfa..24c23c66b226 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -83,7 +83,17 @@ module_param(dbg, bool, 0644); #define PTE_PREFETCH_NUM 8 #define PT_FIRST_AVAIL_BITS_SHIFT 10 -#define PT64_SECOND_AVAIL_BITS_SHIFT 52 +#define PT64_SECOND_AVAIL_BITS_SHIFT 54 + +/* + * The mask used to denote special SPTEs, which can be either MMIO SPTEs or + * Access Tracking SPTEs. + */ +#define SPTE_SPECIAL_MASK (3ULL << 52) +#define SPTE_AD_ENABLED_MASK (0ULL << 52) +#define SPTE_AD_DISABLED_MASK (1ULL << 52) +#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52) +#define SPTE_MMIO_MASK (3ULL << 52) #define PT64_LEVEL_BITS 9 @@ -219,12 +229,11 @@ static u64 __read_mostly shadow_present_mask; static u64 __read_mostly shadow_me_mask; /* - * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. - * Non-present SPTEs with shadow_acc_track_value set are in place for access - * tracking. + * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK; + * shadow_acc_track_mask is the set of bits to be cleared in non-accessed + * pages. */ static u64 __read_mostly shadow_acc_track_mask; -static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; /* * The mask/shift to use for saving the original R/X bits when marking the PTE @@ -304,7 +313,7 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); - shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; + shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; } @@ -320,10 +329,27 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) return sp->role.ad_disabled; } +static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) +{ + /* + * When using the EPT page-modification log, the GPAs in the log + * would come from L2 rather than L1. Therefore, we need to rely + * on write protection to record dirty pages. This also bypasses + * PML, since writes now result in a vmexit. + */ + return vcpu->arch.mmu == &vcpu->arch.guest_mmu; +} + static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(is_mmio_spte(spte)); - return !(spte & shadow_acc_track_value); + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK; +} + +static inline bool spte_ad_need_write_protect(u64 spte) +{ + MMU_WARN_ON(is_mmio_spte(spte)); + return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; } static inline u64 spte_shadow_accessed_mask(u64 spte) @@ -461,7 +487,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, { BUG_ON(!dirty_mask != !accessed_mask); BUG_ON(!accessed_mask && !acc_track_mask); - BUG_ON(acc_track_mask & shadow_acc_track_value); + BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK); shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; @@ -1589,16 +1615,16 @@ static bool spte_clear_dirty(u64 *sptep) rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep); + MMU_WARN_ON(!spte_ad_enabled(spte)); spte &= ~shadow_dirty_mask; - return mmu_spte_update(sptep, spte); } -static bool wrprot_ad_disabled_spte(u64 *sptep) +static bool spte_wrprot_for_clear_dirty(u64 *sptep) { bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT, (unsigned long *)sptep); - if (was_writable) + if (was_writable && !spte_ad_enabled(*sptep)) kvm_set_pfn_dirty(spte_to_pfn(*sptep)); return was_writable; @@ -1617,10 +1643,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) - if (spte_ad_enabled(*sptep)) - flush |= spte_clear_dirty(sptep); + if (spte_ad_need_write_protect(*sptep)) + flush |= spte_wrprot_for_clear_dirty(sptep); else - flush |= wrprot_ad_disabled_spte(sptep); + flush |= spte_clear_dirty(sptep); return flush; } @@ -1631,6 +1657,11 @@ static bool spte_set_dirty(u64 *sptep) rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep); + /* + * Similar to the !kvm_x86_ops->slot_disable_log_dirty case, + * do not bother adding back write access to pages marked + * SPTE_AD_WRPROT_ONLY_MASK. + */ spte |= shadow_dirty_mask; return mmu_spte_update(sptep, spte); @@ -2622,7 +2653,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, shadow_user_mask | shadow_x_mask | shadow_me_mask; if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; else spte |= shadow_accessed_mask; @@ -2968,7 +2999,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, sp = page_header(__pa(sptep)); if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + spte |= SPTE_AD_DISABLED_MASK; + else if (kvm_vcpu_ad_need_write_protect(vcpu)) + spte |= SPTE_AD_WRPROT_ONLY_MASK; /* * For the EPT case, shadow_present_mask is 0 if hardware diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 41abc62c9a8a..e76eb4f07f6c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2610,7 +2610,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, /* VM-entry exception error code */ if (CC(has_error_code && - vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))) + vmcs12->vm_entry_exception_error_code & GENMASK(31, 16))) return -EINVAL; /* VM-entry interruption-info field: reserved bits */ diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 4dea0e0e7e39..3e9c059099e9 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -262,6 +262,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void intel_pmu_refresh(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct x86_pmu_capability x86_pmu; struct kvm_cpuid_entry2 *entry; union cpuid10_eax eax; union cpuid10_edx edx; @@ -283,8 +284,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (!pmu->version) return; + perf_get_x86_pmu_capability(&x86_pmu); + pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, - INTEL_PMC_MAX_GENERIC); + x86_pmu.num_counters_gp); pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; pmu->available_event_types = ~entry->ebx & ((1ull << eax.split.mask_length) - 1); @@ -294,7 +297,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) } else { pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, - INTEL_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d4575ffb3cec..e7970a2e8eae 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -209,6 +209,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) struct page *page; unsigned int i; + if (!boot_cpu_has_bug(X86_BUG_L1TF)) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + if (!enable_ept) { l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; return 0; @@ -7995,12 +8000,10 @@ static int __init vmx_init(void) * contain 'auto' which will be turned into the default 'cond' * mitigation mode. */ - if (boot_cpu_has(X86_BUG_L1TF)) { - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } + r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); + if (r) { + vmx_exit(); + return r; } #ifdef CONFIG_KEXEC_CORE diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ed07d8d2caa..661e2bf38526 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -212,7 +212,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } @@ -885,34 +885,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; - if (cr4 & CR4_RESERVED_BITS) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) - return 1; + return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + return -EINVAL; + + return 0; +} + +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + unsigned long old_cr4 = kvm_read_cr4(vcpu); + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + + if (kvm_valid_cr4(vcpu, cr4)) return 1; if (is_long_mode(vcpu)) { @@ -1161,13 +1169,6 @@ static u32 msrs_to_save[] = { MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, - MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19, - MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21, - MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23, - MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25, - MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27, - MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29, - MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, @@ -1177,13 +1178,6 @@ static u32 msrs_to_save[] = { MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, - MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19, - MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21, - MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23, - MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25, - MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27, - MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29, - MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31, }; static unsigned num_msrs_to_save; @@ -5097,13 +5091,14 @@ out: static void kvm_init_msr_list(void) { + struct x86_pmu_capability x86_pmu; u32 dummy[2]; unsigned i, j; BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, "Please update the fixed PMCs in msrs_to_save[]"); - BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32, - "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]"); + + perf_get_x86_pmu_capability(&x86_pmu); for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) @@ -5145,6 +5140,15 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; + break; + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; } default: break; @@ -8714,10 +8718,6 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - (sregs->cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; - if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in @@ -8736,7 +8736,7 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return -EINVAL; } - return 0; + return kvm_valid_cr4(vcpu, sregs->cr4); } static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 0d3365cb64de..a04551ee5568 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -57,19 +57,7 @@ static efi_system_table_t __init *xen_efi_probe(void) return NULL; /* Here we know that Xen runs on EFI platform. */ - - efi.get_time = xen_efi_get_time; - efi.set_time = xen_efi_set_time; - efi.get_wakeup_time = xen_efi_get_wakeup_time; - efi.set_wakeup_time = xen_efi_set_wakeup_time; - efi.get_variable = xen_efi_get_variable; - efi.get_next_variable = xen_efi_get_next_variable; - efi.set_variable = xen_efi_set_variable; - efi.query_variable_info = xen_efi_query_variable_info; - efi.update_capsule = xen_efi_update_capsule; - efi.query_capsule_caps = xen_efi_query_capsule_caps; - efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; - efi.reset_system = xen_efi_reset_system; + xen_efi_runtime_setup(); efi_systab_xen.tables = info->cfg.addr; efi_systab_xen.nr_tables = info->cfg.nent; diff --git a/block/blk-mq.c b/block/blk-mq.c index 6e3b15f70cd7..ec791156e9cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1992,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) /* bypass scheduler for flush rq */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { + } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs || + !blk_queue_nonrot(q))) { /* * Use plugging if we have a ->commit_rqs() hook as well, as * we know the driver uses bd->last in a smart fashion. + * + * Use normal plugging if this disk is slow HDD, as sequential + * IO may benefit a lot from plug merging. */ unsigned int request_count = plug->rq_count; struct request *last = NULL; @@ -2012,6 +2016,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } blk_add_rq_to_plug(plug, rq); + } else if (q->elevator) { + blk_mq_sched_insert_request(rq, false, true, true); } else if (plug && !blk_queue_nomerges(q)) { /* * We do limited plugging. If the bio can be merged, do that. @@ -2035,8 +2041,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); } - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && - !data.hctx->dispatch_busy)) { + } else if ((q->nr_hw_queues > 1 && is_sync) || + !data.hctx->dispatch_busy) { blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_sched_insert_request(rq, false, true, true); diff --git a/block/sed-opal.c b/block/sed-opal.c index 4e95a9792162..b4c761973ac1 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -129,7 +129,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = { { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 }, /* tables */ - [OPAL_TABLE_TABLE] + [OPAL_TABLE_TABLE] = { 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 }, [OPAL_LOCKINGRANGE_GLOBAL] = { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 }, @@ -372,8 +372,8 @@ static void check_geometry(struct opal_dev *dev, const void *data) { const struct d0_geometry_features *geo = data; - dev->align = geo->alignment_granularity; - dev->lowest_lba = geo->lowest_aligned_lba; + dev->align = be64_to_cpu(geo->alignment_granularity); + dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba); } static int execute_step(struct opal_dev *dev, diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1410fa893653..f6f77eaa7217 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -994,6 +994,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_write_cache(lo->lo_queue, true, false); + if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) { + /* In case of direct I/O, match underlying block size */ + unsigned short bsize = bdev_logical_block_size( + inode->i_sb->s_bdev); + + blk_queue_logical_block_size(lo->lo_queue, bsize); + blk_queue_physical_block_size(lo->lo_queue, bsize); + blk_queue_io_min(lo->lo_queue, bsize); + } + loop_update_rotational(lo); loop_update_dio(lo); set_capacity(lo->lo_disk, size); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 9207ac291341..ad50efb470aa 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -280,9 +280,6 @@ static int sysc_get_one_clock(struct sysc *ddata, const char *name) ddata->clocks[index] = devm_clk_get(ddata->dev, name); if (IS_ERR(ddata->clocks[index])) { - if (PTR_ERR(ddata->clocks[index]) == -ENOENT) - return 0; - dev_err(ddata->dev, "clock get error for %s: %li\n", name, PTR_ERR(ddata->clocks[index])); @@ -357,7 +354,7 @@ static int sysc_get_clocks(struct sysc *ddata) continue; error = sysc_get_one_clock(ddata, name); - if (error && error != -ENOENT) + if (error) return error; } @@ -1632,17 +1629,19 @@ static int sysc_init_module(struct sysc *ddata) if (error) return error; - if (manage_clocks) { - sysc_clkdm_deny_idle(ddata); + sysc_clkdm_deny_idle(ddata); - error = sysc_enable_opt_clocks(ddata); - if (error) - return error; + /* + * Always enable clocks. The bootloader may or may not have enabled + * the related clocks. + */ + error = sysc_enable_opt_clocks(ddata); + if (error) + return error; - error = sysc_enable_main_clocks(ddata); - if (error) - goto err_opt_clocks; - } + error = sysc_enable_main_clocks(ddata); + if (error) + goto err_opt_clocks; if (!(ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT)) { error = sysc_rstctrl_reset_deassert(ddata, true); @@ -1660,7 +1659,7 @@ static int sysc_init_module(struct sysc *ddata) goto err_main_clocks; } - if (!ddata->legacy_mode && manage_clocks) { + if (!ddata->legacy_mode) { error = sysc_enable_module(ddata->dev); if (error) goto err_main_clocks; @@ -1677,6 +1676,7 @@ err_main_clocks: if (manage_clocks) sysc_disable_main_clocks(ddata); err_opt_clocks: + /* No re-enable of clockdomain autoidle to prevent module autoidle */ if (manage_clocks) { sysc_disable_opt_clocks(ddata); sysc_clkdm_allow_idle(ddata); @@ -2357,6 +2357,27 @@ static void ti_sysc_idle(struct work_struct *work) ddata = container_of(work, struct sysc, idle_work.work); + /* + * One time decrement of clock usage counts if left on from init. + * Note that we disable opt clocks unconditionally in this case + * as they are enabled unconditionally during init without + * considering sysc_opt_clks_needed() at that point. + */ + if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE | + SYSC_QUIRK_NO_IDLE_ON_INIT)) { + sysc_disable_main_clocks(ddata); + sysc_disable_opt_clocks(ddata); + sysc_clkdm_allow_idle(ddata); + } + + /* Keep permanent PM runtime usage count for SYSC_QUIRK_NO_IDLE */ + if (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE) + return; + + /* + * Decrement PM runtime usage count for SYSC_QUIRK_NO_IDLE_ON_INIT + * and SYSC_QUIRK_NO_RESET_ON_INIT + */ if (pm_runtime_active(ddata->dev)) pm_runtime_put_sync(ddata->dev); } @@ -2445,7 +2466,8 @@ static int sysc_probe(struct platform_device *pdev) INIT_DELAYED_WORK(&ddata->idle_work, ti_sysc_idle); /* At least earlycon won't survive without deferred idle */ - if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE_ON_INIT | + if (ddata->cfg.quirks & (SYSC_QUIRK_NO_IDLE | + SYSC_QUIRK_NO_IDLE_ON_INIT | SYSC_QUIRK_NO_RESET_ON_INIT)) { schedule_delayed_work(&ddata->idle_work, 3000); } else { diff --git a/drivers/char/random.c b/drivers/char/random.c index d3beed084c0a..de434feb873a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1732,6 +1732,56 @@ void get_random_bytes(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes); + +/* + * Each time the timer fires, we expect that we got an unpredictable + * jump in the cycle counter. Even if the timer is running on another + * CPU, the timer activity will be touching the stack of the CPU that is + * generating entropy.. + * + * Note that we don't re-arm the timer in the timer itself - we are + * happy to be scheduled away, since that just makes the load more + * complex, but we do not want the timer to keep ticking unless the + * entropy loop is running. + * + * So the re-arming always happens in the entropy loop itself. + */ +static void entropy_timer(struct timer_list *t) +{ + credit_entropy_bits(&input_pool, 1); +} + +/* + * If we have an actual cycle counter, see if we can + * generate enough entropy with timing noise + */ +static void try_to_generate_entropy(void) +{ + struct { + unsigned long now; + struct timer_list timer; + } stack; + + stack.now = random_get_entropy(); + + /* Slow counter - or none. Don't even bother */ + if (stack.now == random_get_entropy()) + return; + + timer_setup_on_stack(&stack.timer, entropy_timer, 0); + while (!crng_ready()) { + if (!timer_pending(&stack.timer)) + mod_timer(&stack.timer, jiffies+1); + mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + schedule(); + stack.now = random_get_entropy(); + } + + del_timer_sync(&stack.timer); + destroy_timer_on_stack(&stack.timer); + mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); +} + /* * Wait for the urandom pool to be seeded and thus guaranteed to supply * cryptographically secure random numbers. This applies to: the /dev/urandom @@ -1746,7 +1796,17 @@ int wait_for_random_bytes(void) { if (likely(crng_ready())) return 0; - return wait_event_interruptible(crng_init_wait, crng_ready()); + + do { + int ret; + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; + + try_to_generate_entropy(); + } while (!crng_ready()); + + return 0; } EXPORT_SYMBOL(wait_for_random_bytes); @@ -2460,4 +2520,4 @@ void add_bootloader_randomness(const void *buf, unsigned int size) else add_device_randomness(buf, size); } -EXPORT_SYMBOL_GPL(add_bootloader_randomness);
\ No newline at end of file +EXPORT_SYMBOL_GPL(add_bootloader_randomness); diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index d8c2bd4391d0..11ff701ff4bb 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -25,7 +25,9 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) struct clock_event_device *clkevt = &to->clkevt; - of_irq->percpu ? free_percpu_irq(of_irq->irq, clkevt) : + if (of_irq->percpu) + free_percpu_irq(of_irq->irq, clkevt); + else free_irq(of_irq->irq, clkevt); } diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c index 64cc81915581..ab42c21c5517 100644 --- a/drivers/firmware/arm_scmi/reset.c +++ b/drivers/firmware/arm_scmi/reset.c @@ -150,7 +150,7 @@ static int scmi_domain_reset(const struct scmi_handle *handle, u32 domain, dom = t->tx.buf; dom->domain_id = cpu_to_le32(domain); dom->flags = cpu_to_le32(flags); - dom->domain_id = cpu_to_le32(state); + dom->reset_state = cpu_to_le32(state); if (rdom->async_reset) ret = scmi_do_xfer_with_response(handle, t); diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 42e2c1f57152..00962a659009 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -54,7 +54,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o + amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index eba42c752bca..82155ac3288a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -189,7 +189,7 @@ static int acp_hw_init(void *handle) u32 val = 0; u32 count = 0; struct device *dev; - struct i2s_platform_data *i2s_pdata; + struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -231,20 +231,21 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - if (adev->acp.acp_cell == NULL) - return -ENOMEM; + if (adev->acp.acp_cell == NULL) { + r = -ENOMEM; + goto failure; + } adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); if (adev->acp.acp_res == NULL) { - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); if (i2s_pdata == NULL) { - kfree(adev->acp.acp_res); - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } switch (adev->asic_type) { @@ -341,14 +342,14 @@ static int acp_hw_init(void *handle) r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); if (r) - return r; + goto failure; for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); if (r) { dev_err(dev, "Failed to add dev to genpd\n"); - return r; + goto failure; } } @@ -367,7 +368,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -384,7 +386,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -393,6 +396,13 @@ static int acp_hw_init(void *handle) val &= ~ACP_SOFT_RESET__SoftResetAud_MASK; cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); return 0; + +failure: + kfree(i2s_pdata); + kfree(adev->acp.acp_res); + kfree(adev->acp.acp_cell); + kfree(adev->acp.acp_genpd); + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 264677ab248a..6f8aaf655a9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -81,9 +81,10 @@ * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 34 +#define KMS_DRIVER_MINOR 35 #define KMS_DRIVER_PATCHLEVEL 0 #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 554a59b3c4a6..6ee4021910e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -165,6 +165,7 @@ struct amdgpu_gfx_config { uint32_t num_sc_per_sh; uint32_t num_packer_per_sc; uint32_t pa_sc_tile_steering_override; + uint64_t tcc_disabled_mask; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f6147528be64..f2c097983f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -787,6 +787,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.pa_sc_tile_steering_override = adev->gfx.config.pa_sc_tile_steering_override; + dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e2fb141ff2e5..5251352f5922 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -603,14 +603,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base; -#if 0 if (vm->bulk_moveable) { spin_lock(&glob->lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); spin_unlock(&glob->lru_lock); return; } -#endif memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 638c821611ab..957811b73672 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1691,6 +1691,17 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) +{ + /* TCCs are global (not instanced). */ + uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + + adev->gfx.config.tcc_disabled_mask = + REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | + (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); +} + static void gfx_v10_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1702,6 +1713,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v10_0_get_tcc_info(adev); adev->gfx.config.pa_sc_tile_steering_override = gfx_v10_0_init_pa_sc_tile_steering_override(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 85393a99a848..de9b995b65b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -317,10 +317,12 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); ret = smu_baco_reset(smu); } else { - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); ret = nv_asic_mode1_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f70658a536a9..f8ab80c8801b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -558,12 +558,14 @@ static int soc15_asic_reset(struct amdgpu_device *adev) { switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return soc15_mode2_reset(adev); default: - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } @@ -771,8 +773,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); -#else -# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); break; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8cab6da512a0..a52f0b13a2c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2385,8 +2385,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) dm->dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; - if (adev->asic_type == CHIP_RENOIR) - dm->dc->debug.disable_stutter = true; return 0; fail: @@ -6019,7 +6017,9 @@ static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev, struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; +#ifdef CONFIG_DEBUG_FS enum amdgpu_dm_pipe_crc_source source; +#endif for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 1787b9bf800a..76d54885374a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -668,6 +668,7 @@ struct clock_source *dce100_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 318e9c2e2ca8..89620adc81d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -714,6 +714,7 @@ struct clock_source *dce110_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 83e1878161c9..21a657e79306 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -687,6 +687,7 @@ struct clock_source *dce112_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 8b85e5274bba..7c52f7f9196c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -500,6 +500,7 @@ static struct clock_source *dce120_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 4625df9f9fd2..643ccb0ade00 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -701,6 +701,7 @@ struct clock_source *dce80_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 59305e411a66..1599bb971111 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -786,6 +786,7 @@ struct clock_source *dcn10_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index b4e3ce22ed52..5a2763daff4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1077,6 +1077,7 @@ struct clock_source *dcn20_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 8cd9de8b1a7a..ef673bffc241 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,7 +3,17 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -mpreferred-stack-boundary=4 +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align := -mpreferred-stack-boundary=4 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align := -mstack-alignment=16 +endif + +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) + +ifdef CONFIG_CC_IS_CLANG +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 +endif AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 456cd0e3289c..3b6ed60dcd35 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -39,9 +39,6 @@ * ways. Unless there is something clearly wrong with it the code should * remain as-is as it provides us with a guarantee from HW that it is correct. */ - -typedef unsigned int uint; - typedef struct { double DPPCLK; double DISPCLK; @@ -4774,7 +4771,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - uint m; + unsigned int m; locals->cursor_bw[k] = 0; locals->cursor_bw_pre[k] = 0; @@ -5285,7 +5282,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; double FullDETBufferingTimeYStutterCriticalPlane = 0; double TimeToFinishSwathTransferStutterCriticalPlane = 0; - uint k, j; + unsigned int k, j; mode_lib->vba.TotalActiveDPP = 0; mode_lib->vba.TotalDCCActiveDPP = 0; @@ -5507,7 +5504,7 @@ static void CalculateDCFCLKDeepSleep( double DPPCLK[], double *DCFCLKDeepSleep) { - uint k; + unsigned int k; double DisplayPipeLineDeliveryTimeLuma; double DisplayPipeLineDeliveryTimeChroma; //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX]; @@ -5727,7 +5724,7 @@ static void CalculatePixelDeliveryTimes( double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) { double req_per_swath_ub; - uint k; + unsigned int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (VRatio[k] <= 1) { @@ -5869,7 +5866,7 @@ static void CalculateMetaAndPTETimes( unsigned int dpte_groups_per_row_chroma_ub; unsigned int num_group_per_lower_vm_stage; unsigned int num_req_per_lower_vm_stage; - uint k; + unsigned int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (GPUVMEnable == true) { diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 33960fb38a5d..4acf139ea014 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -843,6 +843,8 @@ static int smu_sw_init(void *handle) smu->smu_baco.state = SMU_BACO_STATE_EXIT; smu->smu_baco.platform_support = false; + mutex_init(&smu->sensor_lock); + smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index f1f072012fac..d493a3f8c07a 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1018,6 +1018,7 @@ static int arcturus_read_sensor(struct smu_context *smu, if (!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -1044,6 +1045,7 @@ static int arcturus_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 6109815a0401..23171a4d9a31 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -344,6 +344,7 @@ struct smu_context const struct smu_funcs *funcs; const struct pptable_funcs *ppt_funcs; struct mutex mutex; + struct mutex sensor_lock; uint64_t pool_size; struct smu_table_context smu_table; diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 12c0e469bf35..0b461404af6b 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -547,7 +547,7 @@ static int navi10_get_metrics_table(struct smu_context *smu, struct smu_table_context *smu_table= &smu->smu_table; int ret = 0; - if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) { + if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) { ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)smu_table->metrics_table, false); if (ret) { @@ -1386,6 +1386,7 @@ static int navi10_read_sensor(struct smu_context *smu, if(!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -1409,6 +1410,7 @@ static int navi10_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 64386ee3f878..bbd8ebd58434 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -3023,6 +3023,7 @@ static int vega20_read_sensor(struct smu_context *smu, if(!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -3048,6 +3049,7 @@ static int vega20_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index 2851cac94d86..b72840c06ab7 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -43,9 +43,8 @@ komeda_wb_encoder_atomic_check(struct drm_encoder *encoder, struct komeda_data_flow_cfg dflow; int err; - if (!writeback_job || !writeback_job->fb) { + if (!writeback_job) return 0; - } if (!crtc_st->active) { DRM_DEBUG_ATOMIC("Cannot write the composition result out on a inactive CRTC.\n"); @@ -166,8 +165,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, &komeda_wb_encoder_helper_funcs, formats, n_formats); komeda_put_fourcc_list(formats); - if (err) + if (err) { + kfree(kwb_conn); return err; + } drm_connector_helper_add(&wb_conn->base, &komeda_wb_conn_helper_funcs); diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c index 22c0847986df..875a3a9eabfa 100644 --- a/drivers/gpu/drm/arm/malidp_mw.c +++ b/drivers/gpu/drm/arm/malidp_mw.c @@ -131,7 +131,7 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder, struct drm_framebuffer *fb; int i, n_planes; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -248,7 +248,7 @@ void malidp_mw_atomic_commit(struct drm_device *drm, mw_state = to_mw_state(conn_state); - if (conn_state->writeback_job && conn_state->writeback_job->fb) { + if (conn_state->writeback_job) { struct drm_framebuffer *fb = conn_state->writeback_job->fb; DRM_DEV_DEBUG_DRIVER(drm->dev, diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 419381abbdd1..14aeaf736321 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -430,10 +430,15 @@ static int drm_atomic_connector_check(struct drm_connector *connector, return -EINVAL; } - if (writeback_job->out_fence && !writeback_job->fb) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", - connector->base.id, connector->name); - return -EINVAL; + if (!writeback_job->fb) { + if (writeback_job->out_fence) { + DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", + connector->base.id, connector->name); + return -EINVAL; + } + + drm_writeback_cleanup_job(writeback_job); + state->writeback_job = NULL; } return 0; diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index ff138b6ec48b..43d9e3bb3a94 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job) if (job->fb) drm_framebuffer_put(job->fb); + if (job->out_fence) + dma_fence_put(job->out_fence); + kfree(job); } EXPORT_SYMBOL(drm_writeback_cleanup_job); @@ -366,25 +369,29 @@ drm_writeback_signal_completion(struct drm_writeback_connector *wb_connector, { unsigned long flags; struct drm_writeback_job *job; + struct dma_fence *out_fence; spin_lock_irqsave(&wb_connector->job_lock, flags); job = list_first_entry_or_null(&wb_connector->job_queue, struct drm_writeback_job, list_entry); - if (job) { + if (job) list_del(&job->list_entry); - if (job->out_fence) { - if (status) - dma_fence_set_error(job->out_fence, status); - dma_fence_signal(job->out_fence); - dma_fence_put(job->out_fence); - } - } + spin_unlock_irqrestore(&wb_connector->job_lock, flags); if (WARN_ON(!job)) return; + out_fence = job->out_fence; + if (out_fence) { + if (status) + dma_fence_set_error(out_fence, status); + dma_fence_signal(out_fence); + dma_fence_put(out_fence); + job->out_fence = NULL; + } + INIT_WORK(&job->cleanup_work, cleanup_work); queue_work(system_long_wq, &job->cleanup_work); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b51d1ceb8739..ce05e805b08f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7261,7 +7261,7 @@ retry: pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n, false); + link_bw, &pipe_config->fdi_m_n, false, false); ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EDEADLK) @@ -7508,11 +7508,15 @@ void intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n) + bool constant_n, bool fec_enable) { - m_n->tu = 64; + u32 data_clock = bits_per_pixel * pixel_clock; + + if (fec_enable) + data_clock = intel_dp_mode_to_fec_clock(data_clock); - compute_m_n(bits_per_pixel * pixel_clock, + m_n->tu = 64; + compute_m_n(data_clock, link_clock * nlanes * 8, &m_n->gmch_m, &m_n->gmch_n, constant_n); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index e57e6969051d..01fa87ad3270 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -414,7 +414,7 @@ enum phy_fia { void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n); + bool constant_n, bool fec_enable); bool is_ccs_modifier(u64 modifier); void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 921ad0a2f7ba..57e9f0ba331b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -78,8 +78,8 @@ #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 #define DP_DSC_MAX_ENC_THROUGHPUT_1 400000 -/* DP DSC FEC Overhead factor = (100 - 2.4)/100 */ -#define DP_DSC_FEC_OVERHEAD_FACTOR 976 +/* DP DSC FEC Overhead factor = 1/(0.972261) */ +#define DP_DSC_FEC_OVERHEAD_FACTOR 972261 /* Compliance test status bits */ #define INTEL_DP_RESOLUTION_SHIFT_MASK 0 @@ -494,6 +494,97 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock) +{ + return div_u64(mul_u32_u32(mode_clock, 1000000U), + DP_DSC_FEC_OVERHEAD_FACTOR); +} + +static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count, + u32 mode_clock, u32 mode_hdisplay) +{ + u32 bits_per_pixel, max_bpp_small_joiner_ram; + int i; + + /* + * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* + * (LinkSymbolClock)* 8 * (TimeSlotsPerMTP) + * for SST -> TimeSlotsPerMTP is 1, + * for MST -> TimeSlotsPerMTP has to be calculated + */ + bits_per_pixel = (link_clock * lane_count * 8) / + intel_dp_mode_to_fec_clock(mode_clock); + DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel); + + /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ + max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / mode_hdisplay; + DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram); + + /* + * Greatest allowed DSC BPP = MIN (output BPP from available Link BW + * check, output bpp from small joiner RAM check) + */ + bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); + + /* Error out if the max bpp is less than smallest allowed valid bpp */ + if (bits_per_pixel < valid_dsc_bpp[0]) { + DRM_DEBUG_KMS("Unsupported BPP %u, min %u\n", + bits_per_pixel, valid_dsc_bpp[0]); + return 0; + } + + /* Find the nearest match in the array of known BPPs from VESA */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { + if (bits_per_pixel < valid_dsc_bpp[i + 1]) + break; + } + bits_per_pixel = valid_dsc_bpp[i]; + + /* + * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, + * fractional part is 0 + */ + return bits_per_pixel << 4; +} + +static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, + int mode_clock, int mode_hdisplay) +{ + u8 min_slice_count, i; + int max_slice_width; + + if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_0); + else + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_1); + + max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); + if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { + DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", + max_slice_width); + return 0; + } + /* Also take into account max slice width */ + min_slice_count = min_t(u8, min_slice_count, + DIV_ROUND_UP(mode_hdisplay, + max_slice_width)); + + /* Find the closest match to the valid slice count values */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { + if (valid_dsc_slicecount[i] > + drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, + false)) + break; + if (min_slice_count <= valid_dsc_slicecount[i]) + return valid_dsc_slicecount[i]; + } + + DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); + return 0; +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) @@ -2226,7 +2317,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, - constant_n); + constant_n, pipe_config->fec_enable); if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { @@ -2236,7 +2327,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, &pipe_config->dp_m2_n2, - constant_n); + constant_n, pipe_config->fec_enable); } if (!HAS_DDI(dev_priv)) @@ -4323,91 +4414,6 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay) -{ - u16 bits_per_pixel, max_bpp_small_joiner_ram; - int i; - - /* - * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* - * (LinkSymbolClock)* 8 * ((100-FECOverhead)/100)*(TimeSlotsPerMTP) - * FECOverhead = 2.4%, for SST -> TimeSlotsPerMTP is 1, - * for MST -> TimeSlotsPerMTP has to be calculated - */ - bits_per_pixel = (link_clock * lane_count * 8 * - DP_DSC_FEC_OVERHEAD_FACTOR) / - mode_clock; - - /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ - max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / - mode_hdisplay; - - /* - * Greatest allowed DSC BPP = MIN (output BPP from avaialble Link BW - * check, output bpp from small joiner RAM check) - */ - bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); - - /* Error out if the max bpp is less than smallest allowed valid bpp */ - if (bits_per_pixel < valid_dsc_bpp[0]) { - DRM_DEBUG_KMS("Unsupported BPP %d\n", bits_per_pixel); - return 0; - } - - /* Find the nearest match in the array of known BPPs from VESA */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { - if (bits_per_pixel < valid_dsc_bpp[i + 1]) - break; - } - bits_per_pixel = valid_dsc_bpp[i]; - - /* - * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, - * fractional part is 0 - */ - return bits_per_pixel << 4; -} - -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, - int mode_clock, - int mode_hdisplay) -{ - u8 min_slice_count, i; - int max_slice_width; - - if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_0); - else - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_1); - - max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); - if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { - DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", - max_slice_width); - return 0; - } - /* Also take into account max slice width */ - min_slice_count = min_t(u8, min_slice_count, - DIV_ROUND_UP(mode_hdisplay, - max_slice_width)); - - /* Find the closest match to the valid slice count values */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { - if (valid_dsc_slicecount[i] > - drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, - false)) - break; - if (min_slice_count <= valid_dsc_slicecount[i]) - return valid_dsc_slicecount[i]; - } - - DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); - return 0; -} - static void intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 657bbb1f5ed0..00981fb9414b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -102,10 +102,6 @@ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status); -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay); -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, - int mode_hdisplay); bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); @@ -118,4 +114,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) return ~((1 << lane_count) - 1) & 0xf; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 6df240a01b8c..600873c796d0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -81,7 +81,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, crtc_state->port_clock, &crtc_state->dp_m_n, - constant_n); + constant_n, crtc_state->fec_enable); crtc_state->dp_m_n.tu = slots; return 0; @@ -615,7 +615,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; - intel_encoder->crtc_mask = BIT(pipe); + intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; intel_encoder->compute_config = intel_dp_mst_compute_config; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index dea63be1964f..cae25e493128 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1528,6 +1528,7 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, int src_x, src_w, src_h, crtc_w, crtc_h; const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + unsigned int stride = plane_state->color_plane[0].stride; unsigned int cpp = fb->format->cpp[0]; unsigned int width_bytes; int min_width, min_height; @@ -1569,9 +1570,9 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, return -EINVAL; } - if (width_bytes > 4096 || fb->pitches[0] > 4096) { + if (stride > 4096) { DRM_DEBUG_KMS("Stride (%u) exceeds hardware max with scaling (%u)\n", - fb->pitches[0], 4096); + stride, 4096); return -EINVAL; } diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index e226324adb69..4bdd63b57100 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1083,7 +1083,7 @@ static const struct dss_features omap34xx_dss_feats = { static const struct dss_features omap3630_dss_feats = { .model = DSS_MODEL_OMAP3, - .fck_div_max = 32, + .fck_div_max = 31, .fck_freq_max = 173000000, .dss_fck_multiplier = 1, .parent_clk_name = "dpll4_ck", diff --git a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c index ae07290bba6a..04efa78d70b6 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c @@ -147,7 +147,7 @@ static int rcar_du_wb_enc_atomic_check(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct drm_framebuffer *fb; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -221,7 +221,7 @@ void rcar_du_writeback_setup(struct rcar_du_crtc *rcrtc, unsigned int i; state = rcrtc->writeback.base.state; - if (!state || !state->writeback_job || !state->writeback_job->fb) + if (!state || !state->writeback_job) return; fb = state->writeback_job->fb; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c index 525dc1c0f1c1..530edb3b51cc 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c @@ -7,6 +7,7 @@ #include <linux/gpio.h> #include <linux/mod_devicetable.h> #include <linux/of_gpio.h> +#include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> #include <drm/drm_atomic_helper.h> diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index 1ce4d7142b6e..bf720206727f 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -231,7 +231,7 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn, int i; conn_state = drm_atomic_get_new_connector_state(state, conn); - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); @@ -271,8 +271,7 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, u32 ctrl; int i; - if (WARN_ON(!conn_state->writeback_job || - !conn_state->writeback_job->fb)) + if (WARN_ON(!conn_state->writeback_job)) return; mode = &conn_state->crtc->state->adjusted_mode; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index c09791fb4929..f1c714acc280 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1736,6 +1736,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS: case PCI_DEVICE_ID_INTEL_DNV_SMBUS: case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS: + priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; priv->features |= FEATURE_SMBUS_PEC; diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index a89bfce5388e..17abf60c94ae 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -355,11 +355,13 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, { dma_addr_t rx_dma; unsigned long time_left; - void *dma_buf; + void *dma_buf = NULL; struct geni_se *se = &gi2c->se; size_t len = msg->len; - dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (!of_machine_is_compatible("lenovo,yoga-c630")) + dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (dma_buf) geni_se_select_mode(se, GENI_SE_DMA); else @@ -394,11 +396,13 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, { dma_addr_t tx_dma; unsigned long time_left; - void *dma_buf; + void *dma_buf = NULL; struct geni_se *se = &gi2c->se; size_t len = msg->len; - dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (!of_machine_is_compatible("lenovo,yoga-c630")) + dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); + if (dma_buf) geni_se_select_mode(se, GENI_SE_DMA); else diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index f31413fd9521..800414886f6b 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -202,6 +202,7 @@ static irqreturn_t riic_tend_isr(int irq, void *data) if (readb(riic->base + RIIC_ICSR2) & ICSR2_NACKF) { /* We got a NACKIE */ readb(riic->base + RIIC_ICDRR); /* dummy read */ + riic_clear_set_bit(riic, ICSR2_NACKF, 0, RIIC_ICSR2); riic->err = -ENXIO; } else if (riic->bytes_left) { return IRQ_NONE; diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c index 92ff9991bae8..db9763cb4dae 100644 --- a/drivers/i2c/i2c-slave-eeprom.c +++ b/drivers/i2c/i2c-slave-eeprom.c @@ -33,11 +33,13 @@ struct eeprom_data { u16 address_mask; u8 num_address_bytes; u8 idx_write_cnt; + bool read_only; u8 buffer[]; }; #define I2C_SLAVE_BYTELEN GENMASK(15, 0) #define I2C_SLAVE_FLAG_ADDR16 BIT(16) +#define I2C_SLAVE_FLAG_RO BIT(17) #define I2C_SLAVE_DEVICE_MAGIC(_len, _flags) ((_flags) | (_len)) static int i2c_slave_eeprom_slave_cb(struct i2c_client *client, @@ -53,9 +55,11 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client, eeprom->buffer_idx = *val | (eeprom->buffer_idx << 8); eeprom->idx_write_cnt++; } else { - spin_lock(&eeprom->buffer_lock); - eeprom->buffer[eeprom->buffer_idx++ & eeprom->address_mask] = *val; - spin_unlock(&eeprom->buffer_lock); + if (!eeprom->read_only) { + spin_lock(&eeprom->buffer_lock); + eeprom->buffer[eeprom->buffer_idx++ & eeprom->address_mask] = *val; + spin_unlock(&eeprom->buffer_lock); + } } break; @@ -130,6 +134,7 @@ static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_de eeprom->idx_write_cnt = 0; eeprom->num_address_bytes = flag_addr16 ? 2 : 1; eeprom->address_mask = size - 1; + eeprom->read_only = FIELD_GET(I2C_SLAVE_FLAG_RO, id->driver_data); spin_lock_init(&eeprom->buffer_lock); i2c_set_clientdata(client, eeprom); @@ -165,8 +170,11 @@ static int i2c_slave_eeprom_remove(struct i2c_client *client) static const struct i2c_device_id i2c_slave_eeprom_id[] = { { "slave-24c02", I2C_SLAVE_DEVICE_MAGIC(2048 / 8, 0) }, + { "slave-24c02ro", I2C_SLAVE_DEVICE_MAGIC(2048 / 8, I2C_SLAVE_FLAG_RO) }, { "slave-24c32", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16) }, + { "slave-24c32ro", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16 | I2C_SLAVE_FLAG_RO) }, { "slave-24c64", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16) }, + { "slave-24c64ro", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16 | I2C_SLAVE_FLAG_RO) }, { } }; MODULE_DEVICE_TABLE(i2c, i2c_slave_eeprom_id); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 97975bb7f347..2369b8af81f3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -70,7 +70,6 @@ */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) -static DEFINE_SPINLOCK(amd_iommu_devtable_lock); static DEFINE_SPINLOCK(pd_bitmap_lock); /* List of all available dev_data structures */ @@ -202,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; + spin_lock_init(&dev_data->lock); dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); @@ -501,6 +501,29 @@ static void iommu_uninit_device(struct device *dev) */ } +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, + unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + /**************************************************************************** * * Interrupt handling functions @@ -1311,8 +1334,12 @@ static void domain_flush_np_cache(struct protection_domain *domain, dma_addr_t iova, size_t size) { if (unlikely(amd_iommu_np_cache)) { + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); domain_flush_complete(domain); + spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1425,7 +1452,7 @@ static void free_pagetable(struct protection_domain *domain) BUG_ON(domain->mode < PAGE_MODE_NONE || domain->mode > PAGE_MODE_6_LEVEL); - free_sub_pt(root, domain->mode, freelist); + freelist = free_sub_pt(root, domain->mode, freelist); free_page_list(freelist); } @@ -1435,10 +1462,11 @@ static void free_pagetable(struct protection_domain *domain) * another level increases the size of the address space by 9 bits to a size up * to 64 bits. */ -static void increase_address_space(struct protection_domain *domain, +static bool increase_address_space(struct protection_domain *domain, gfp_t gfp) { unsigned long flags; + bool ret = false; u64 *pte; spin_lock_irqsave(&domain->lock, flags); @@ -1455,19 +1483,21 @@ static void increase_address_space(struct protection_domain *domain, iommu_virt_to_phys(domain->pt_root)); domain->pt_root = pte; domain->mode += 1; - domain->updated = true; + + ret = true; out: spin_unlock_irqrestore(&domain->lock, flags); - return; + return ret; } static u64 *alloc_pte(struct protection_domain *domain, unsigned long address, unsigned long page_size, u64 **pte_page, - gfp_t gfp) + gfp_t gfp, + bool *updated) { int level, end_lvl; u64 *pte, *page; @@ -1475,7 +1505,7 @@ static u64 *alloc_pte(struct protection_domain *domain, BUG_ON(!is_power_of_2(page_size)); while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); + *updated = increase_address_space(domain, gfp) || *updated; level = domain->mode - 1; pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; @@ -1489,9 +1519,32 @@ static u64 *alloc_pte(struct protection_domain *domain, __pte = *pte; pte_level = PM_PTE_LEVEL(__pte); - if (!IOMMU_PTE_PRESENT(__pte) || + /* + * If we replace a series of large PTEs, we need + * to tear down all of them. + */ + if (IOMMU_PTE_PRESENT(__pte) && pte_level == PAGE_MODE_7_LEVEL) { + unsigned long count, i; + u64 *lpte; + + lpte = first_pte_l7(pte, NULL, &count); + + /* + * Unmap the replicated PTEs that still match the + * original large mapping + */ + for (i = 0; i < count; ++i) + cmpxchg64(&lpte[i], __pte, 0ULL); + + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte) || + pte_level == PAGE_MODE_NONE) { page = (u64 *)get_zeroed_page(gfp); + if (!page) return NULL; @@ -1500,8 +1553,8 @@ static u64 *alloc_pte(struct protection_domain *domain, /* pte could have been changed somewhere. */ if (cmpxchg64(pte, __pte, __npte) != __pte) free_page((unsigned long)page); - else if (pte_level == PAGE_MODE_7_LEVEL) - domain->updated = true; + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; continue; } @@ -1566,17 +1619,12 @@ static u64 *fetch_pte(struct protection_domain *domain, *page_size = PTE_LEVEL_PAGE_SIZE(level); } - if (PM_PTE_LEVEL(*pte) == 0x07) { - unsigned long pte_mask; - - /* - * If we have a series of large PTEs, make - * sure to return a pointer to the first one. - */ - *page_size = pte_mask = PTE_PAGE_SIZE(*pte); - pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); - pte = (u64 *)(((unsigned long)pte) & pte_mask); - } + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) + pte = first_pte_l7(pte, page_size, NULL); return pte; } @@ -1615,26 +1663,29 @@ static int iommu_map_page(struct protection_domain *dom, gfp_t gfp) { struct page *freelist = NULL; + bool updated = false; u64 __pte, *pte; - int i, count; + int ret, i, count; BUG_ON(!IS_ALIGNED(bus_addr, page_size)); BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) - return -EINVAL; + goto out; count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp); + pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + ret = -ENOMEM; if (!pte) - return -ENOMEM; + goto out; for (i = 0; i < count; ++i) freelist = free_clear_pte(&pte[i], pte[i], freelist); if (freelist != NULL) - dom->updated = true; + updated = true; if (count > 1) { __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); @@ -1650,12 +1701,21 @@ static int iommu_map_page(struct protection_domain *dom, for (i = 0; i < count; ++i) pte[i] = __pte; - update_domain(dom); + ret = 0; + +out: + if (updated) { + unsigned long flags; + + spin_lock_irqsave(&dom->lock, flags); + update_domain(dom); + spin_unlock_irqrestore(&dom->lock, flags); + } /* Everything flushed out, free pages now */ free_page_list(freelist); - return 0; + return ret; } static unsigned long iommu_unmap_page(struct protection_domain *dom, @@ -1806,8 +1866,12 @@ static void free_gcr3_table(struct protection_domain *domain) static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) { + unsigned long flags; + + spin_lock_irqsave(&dom->domain.lock, flags); domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); + spin_unlock_irqrestore(&dom->domain.lock, flags); } static void iova_domain_flush_tlb(struct iova_domain *iovad) @@ -2022,36 +2086,6 @@ static void do_detach(struct iommu_dev_data *dev_data) domain->dev_cnt -= 1; } -/* - * If a device is not yet associated with a domain, this function makes the - * device visible in the domain - */ -static int __attach_device(struct iommu_dev_data *dev_data, - struct protection_domain *domain) -{ - int ret; - - /* lock domain */ - spin_lock(&domain->lock); - - ret = -EBUSY; - if (dev_data->domain != NULL) - goto out_unlock; - - /* Attach alias group root */ - do_attach(dev_data, domain); - - ret = 0; - -out_unlock: - - /* ready */ - spin_unlock(&domain->lock); - - return ret; -} - - static void pdev_iommuv2_disable(struct pci_dev *pdev) { pci_disable_ats(pdev); @@ -2133,19 +2167,28 @@ static int attach_device(struct device *dev, unsigned long flags; int ret; + spin_lock_irqsave(&domain->lock, flags); + dev_data = get_dev_data(dev); + spin_lock(&dev_data->lock); + + ret = -EBUSY; + if (dev_data->domain != NULL) + goto out; + if (!dev_is_pci(dev)) goto skip_ats_check; pdev = to_pci_dev(dev); if (domain->flags & PD_IOMMUV2_MASK) { + ret = -EINVAL; if (!dev_data->passthrough) - return -EINVAL; + goto out; if (dev_data->iommu_v2) { if (pdev_iommuv2_enable(pdev) != 0) - return -EINVAL; + goto out; dev_data->ats.enabled = true; dev_data->ats.qdep = pci_ats_queue_depth(pdev); @@ -2158,9 +2201,9 @@ static int attach_device(struct device *dev, } skip_ats_check: - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); - ret = __attach_device(dev_data, domain); - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + ret = 0; + + do_attach(dev_data, domain); /* * We might boot into a crash-kernel here. The crashed kernel @@ -2169,23 +2212,14 @@ skip_ats_check: */ domain_flush_tlb_pde(domain); - return ret; -} - -/* - * Removes a device from a protection domain (unlocked) - */ -static void __detach_device(struct iommu_dev_data *dev_data) -{ - struct protection_domain *domain; - - domain = dev_data->domain; + domain_flush_complete(domain); - spin_lock(&domain->lock); +out: + spin_unlock(&dev_data->lock); - do_detach(dev_data); + spin_unlock_irqrestore(&domain->lock, flags); - spin_unlock(&domain->lock); + return ret; } /* @@ -2200,6 +2234,10 @@ static void detach_device(struct device *dev) dev_data = get_dev_data(dev); domain = dev_data->domain; + spin_lock_irqsave(&domain->lock, flags); + + spin_lock(&dev_data->lock); + /* * First check if the device is still attached. It might already * be detached from its domain because the generic @@ -2207,15 +2245,12 @@ static void detach_device(struct device *dev) * our alias handling. */ if (WARN_ON(!dev_data->domain)) - return; + goto out; - /* lock device table */ - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(dev_data); - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + do_detach(dev_data); if (!dev_is_pci(dev)) - return; + goto out; if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) pdev_iommuv2_disable(to_pci_dev(dev)); @@ -2223,6 +2258,11 @@ static void detach_device(struct device *dev) pci_disable_ats(to_pci_dev(dev)); dev_data->ats.enabled = false; + +out: + spin_unlock(&dev_data->lock); + + spin_unlock_irqrestore(&domain->lock, flags); } static int amd_iommu_add_device(struct device *dev) @@ -2354,15 +2394,10 @@ static void update_device_table(struct protection_domain *domain) static void update_domain(struct protection_domain *domain) { - if (!domain->updated) - return; - update_device_table(domain); domain_flush_devices(domain); domain_flush_tlb_pde(domain); - - domain->updated = false; } static int dir2prot(enum dma_data_direction direction) @@ -2392,6 +2427,7 @@ static dma_addr_t __map_single(struct device *dev, { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start, ret; + unsigned long flags; unsigned int pages; int prot = 0; int i; @@ -2429,8 +2465,10 @@ out_unmap: iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); } + spin_lock_irqsave(&dma_dom->domain.lock, flags); domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); + spin_unlock_irqrestore(&dma_dom->domain.lock, flags); dma_ops_free_iova(dma_dom, address, pages); @@ -2459,8 +2497,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, } if (amd_iommu_unmap_flush) { + unsigned long flags; + + spin_lock_irqsave(&dma_dom->domain.lock, flags); domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); + spin_unlock_irqrestore(&dma_dom->domain.lock, flags); dma_ops_free_iova(dma_dom, dma_addr, pages); } else { pages = __roundup_pow_of_two(pages); @@ -2866,16 +2908,16 @@ static void cleanup_domain(struct protection_domain *domain) struct iommu_dev_data *entry; unsigned long flags; - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); + spin_lock_irqsave(&domain->lock, flags); while (!list_empty(&domain->dev_list)) { entry = list_first_entry(&domain->dev_list, struct iommu_dev_data, list); BUG_ON(!entry->domain); - __detach_device(entry); + do_detach(entry); } - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); } static void protection_domain_free(struct protection_domain *domain) @@ -3226,9 +3268,12 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct protection_domain *dom = to_pdomain(domain); + unsigned long flags; + spin_lock_irqsave(&dom->lock, flags); domain_flush_tlb_pde(dom); domain_flush_complete(dom); + spin_unlock_irqrestore(&dom->lock, flags); } static void amd_iommu_iotlb_sync(struct iommu_domain *domain, @@ -3290,7 +3335,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) /* Update data structure */ domain->mode = PAGE_MODE_NONE; - domain->updated = true; /* Make changes visible to IOMMUs */ update_domain(domain); @@ -3336,7 +3380,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) domain->glx = levels; domain->flags |= PD_IOMMUV2_MASK; - domain->updated = true; update_domain(domain); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 9ac229e92b07..c9c1612d52e0 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -475,7 +475,6 @@ struct protection_domain { int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ - bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; @@ -634,6 +633,9 @@ struct devid_map { * This struct contains device specific data for the IOMMU */ struct iommu_dev_data { + /*Protect against attach/detach races */ + spinlock_t lock; + struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 0e019cc5da42..dfac6afa82ca 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -36,7 +36,6 @@ #include <linux/regulator/db8500-prcmu.h> #include <linux/regulator/machine.h> #include <linux/platform_data/ux500_wdt.h> -#include <linux/platform_data/db8500_thermal.h> #include "dbx500-prcmu-regs.h" /* Index of different voltages to be used when accessing AVSData */ @@ -3014,53 +3013,6 @@ static struct ux500_wdt_data db8500_wdt_pdata = { .timeout = 600, /* 10 minutes */ .has_28_bits_resolution = true, }; -/* - * Thermal Sensor - */ - -static struct resource db8500_thsens_resources[] = { - { - .name = "IRQ_HOTMON_LOW", - .start = IRQ_PRCMU_HOTMON_LOW, - .end = IRQ_PRCMU_HOTMON_LOW, - .flags = IORESOURCE_IRQ, - }, - { - .name = "IRQ_HOTMON_HIGH", - .start = IRQ_PRCMU_HOTMON_HIGH, - .end = IRQ_PRCMU_HOTMON_HIGH, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct db8500_thsens_platform_data db8500_thsens_data = { - .trip_points[0] = { - .temp = 70000, - .type = THERMAL_TRIP_ACTIVE, - .cdev_name = { - [0] = "thermal-cpufreq-0", - }, - }, - .trip_points[1] = { - .temp = 75000, - .type = THERMAL_TRIP_ACTIVE, - .cdev_name = { - [0] = "thermal-cpufreq-0", - }, - }, - .trip_points[2] = { - .temp = 80000, - .type = THERMAL_TRIP_ACTIVE, - .cdev_name = { - [0] = "thermal-cpufreq-0", - }, - }, - .trip_points[3] = { - .temp = 85000, - .type = THERMAL_TRIP_CRITICAL, - }, - .num_trips = 4, -}; static const struct mfd_cell common_prcmu_devs[] = { { @@ -3084,10 +3036,7 @@ static const struct mfd_cell db8500_prcmu_devs[] = { }, { .name = "db8500-thermal", - .num_resources = ARRAY_SIZE(db8500_thsens_resources), - .resources = db8500_thsens_resources, - .platform_data = &db8500_thsens_data, - .pdata_size = sizeof(db8500_thsens_data), + .of_compatible = "stericsson,db8500-thermal", }, }; diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 3a52f5703286..49ea02c467bf 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -94,6 +94,7 @@ config MMC_SDHCI_PCI depends on MMC_SDHCI && PCI select MMC_CQHCI select IOSF_MBI if X86 + select MMC_SDHCI_IO_ACCESSORS help This selects the PCI Secure Digital Host Controller Interface. Most controllers found today are PCI devices. diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 390ee162fe71..11c4598e91d9 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MMC_MXS) += mxs-mmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o sdhci-pci-y += sdhci-pci-core.o sdhci-pci-o2micro.o sdhci-pci-arasan.o \ - sdhci-pci-dwc-mshc.o + sdhci-pci-dwc-mshc.o sdhci-pci-gli.o obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += sdhci-pci-data.o obj-$(CONFIG_MMC_SDHCI_ACPI) += sdhci-acpi.o obj-$(CONFIG_MMC_SDHCI_PXAV3) += sdhci-pxav3.o diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 3271c2d76629..1d1953dfc54b 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -495,7 +495,12 @@ static int esdhc_of_enable_dma(struct sdhci_host *host) dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); value = sdhci_readl(host, ESDHC_DMA_SYSCTL); - value |= ESDHC_DMA_SNOOP; + + if (of_dma_is_coherent(dev->of_node)) + value |= ESDHC_DMA_SNOOP; + else + value &= ~ESDHC_DMA_SNOOP; + sdhci_writel(host, value, ESDHC_DMA_SYSCTL); return 0; } diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index e1ca185d7328..eaffa85bc728 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1685,6 +1685,8 @@ static const struct pci_device_id pci_ids[] = { SDHCI_PCI_DEVICE(O2, SEABIRD1, o2), SDHCI_PCI_DEVICE(ARASAN, PHY_EMMC, arasan), SDHCI_PCI_DEVICE(SYNOPSYS, DWC_MSHC, snps), + SDHCI_PCI_DEVICE(GLI, 9750, gl9750), + SDHCI_PCI_DEVICE(GLI, 9755, gl9755), SDHCI_PCI_DEVICE_CLASS(AMD, SYSTEM_SDHCI, PCI_CLASS_MASK, amd), /* Generic SD host controller */ {PCI_DEVICE_CLASS(SYSTEM_SDHCI, PCI_CLASS_MASK)}, diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c new file mode 100644 index 000000000000..5eea8d70a85d --- /dev/null +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 Genesys Logic, Inc. + * + * Authors: Ben Chuang <ben.chuang@genesyslogic.com.tw> + * + * Version: v0.9.0 (2019-08-08) + */ + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/pci.h> +#include <linux/mmc/mmc.h> +#include <linux/delay.h> +#include "sdhci.h" +#include "sdhci-pci.h" + +/* Genesys Logic extra registers */ +#define SDHCI_GLI_9750_WT 0x800 +#define SDHCI_GLI_9750_WT_EN BIT(0) +#define GLI_9750_WT_EN_ON 0x1 +#define GLI_9750_WT_EN_OFF 0x0 + +#define SDHCI_GLI_9750_DRIVING 0x860 +#define SDHCI_GLI_9750_DRIVING_1 GENMASK(11, 0) +#define SDHCI_GLI_9750_DRIVING_2 GENMASK(27, 26) +#define GLI_9750_DRIVING_1_VALUE 0xFFF +#define GLI_9750_DRIVING_2_VALUE 0x3 + +#define SDHCI_GLI_9750_PLL 0x864 +#define SDHCI_GLI_9750_PLL_TX2_INV BIT(23) +#define SDHCI_GLI_9750_PLL_TX2_DLY GENMASK(22, 20) +#define GLI_9750_PLL_TX2_INV_VALUE 0x1 +#define GLI_9750_PLL_TX2_DLY_VALUE 0x0 + +#define SDHCI_GLI_9750_SW_CTRL 0x874 +#define SDHCI_GLI_9750_SW_CTRL_4 GENMASK(7, 6) +#define GLI_9750_SW_CTRL_4_VALUE 0x3 + +#define SDHCI_GLI_9750_MISC 0x878 +#define SDHCI_GLI_9750_MISC_TX1_INV BIT(2) +#define SDHCI_GLI_9750_MISC_RX_INV BIT(3) +#define SDHCI_GLI_9750_MISC_TX1_DLY GENMASK(6, 4) +#define GLI_9750_MISC_TX1_INV_VALUE 0x0 +#define GLI_9750_MISC_RX_INV_ON 0x1 +#define GLI_9750_MISC_RX_INV_OFF 0x0 +#define GLI_9750_MISC_RX_INV_VALUE GLI_9750_MISC_RX_INV_OFF +#define GLI_9750_MISC_TX1_DLY_VALUE 0x5 + +#define SDHCI_GLI_9750_TUNING_CONTROL 0x540 +#define SDHCI_GLI_9750_TUNING_CONTROL_EN BIT(4) +#define GLI_9750_TUNING_CONTROL_EN_ON 0x1 +#define GLI_9750_TUNING_CONTROL_EN_OFF 0x0 +#define SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1 BIT(16) +#define SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2 GENMASK(20, 19) +#define GLI_9750_TUNING_CONTROL_GLITCH_1_VALUE 0x1 +#define GLI_9750_TUNING_CONTROL_GLITCH_2_VALUE 0x2 + +#define SDHCI_GLI_9750_TUNING_PARAMETERS 0x544 +#define SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY GENMASK(2, 0) +#define GLI_9750_TUNING_PARAMETERS_RX_DLY_VALUE 0x1 + +#define GLI_MAX_TUNING_LOOP 40 + +/* Genesys Logic chipset */ +static inline void gl9750_wt_on(struct sdhci_host *host) +{ + u32 wt_value; + u32 wt_enable; + + wt_value = sdhci_readl(host, SDHCI_GLI_9750_WT); + wt_enable = FIELD_GET(SDHCI_GLI_9750_WT_EN, wt_value); + + if (wt_enable == GLI_9750_WT_EN_ON) + return; + + wt_value &= ~SDHCI_GLI_9750_WT_EN; + wt_value |= FIELD_PREP(SDHCI_GLI_9750_WT_EN, GLI_9750_WT_EN_ON); + + sdhci_writel(host, wt_value, SDHCI_GLI_9750_WT); +} + +static inline void gl9750_wt_off(struct sdhci_host *host) +{ + u32 wt_value; + u32 wt_enable; + + wt_value = sdhci_readl(host, SDHCI_GLI_9750_WT); + wt_enable = FIELD_GET(SDHCI_GLI_9750_WT_EN, wt_value); + + if (wt_enable == GLI_9750_WT_EN_OFF) + return; + + wt_value &= ~SDHCI_GLI_9750_WT_EN; + wt_value |= FIELD_PREP(SDHCI_GLI_9750_WT_EN, GLI_9750_WT_EN_OFF); + + sdhci_writel(host, wt_value, SDHCI_GLI_9750_WT); +} + +static void gli_set_9750(struct sdhci_host *host) +{ + u32 driving_value; + u32 pll_value; + u32 sw_ctrl_value; + u32 misc_value; + u32 parameter_value; + u32 control_value; + u16 ctrl2; + + gl9750_wt_on(host); + + driving_value = sdhci_readl(host, SDHCI_GLI_9750_DRIVING); + pll_value = sdhci_readl(host, SDHCI_GLI_9750_PLL); + sw_ctrl_value = sdhci_readl(host, SDHCI_GLI_9750_SW_CTRL); + misc_value = sdhci_readl(host, SDHCI_GLI_9750_MISC); + parameter_value = sdhci_readl(host, SDHCI_GLI_9750_TUNING_PARAMETERS); + control_value = sdhci_readl(host, SDHCI_GLI_9750_TUNING_CONTROL); + + driving_value &= ~(SDHCI_GLI_9750_DRIVING_1); + driving_value &= ~(SDHCI_GLI_9750_DRIVING_2); + driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_1, + GLI_9750_DRIVING_1_VALUE); + driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_2, + GLI_9750_DRIVING_2_VALUE); + sdhci_writel(host, driving_value, SDHCI_GLI_9750_DRIVING); + + sw_ctrl_value &= ~SDHCI_GLI_9750_SW_CTRL_4; + sw_ctrl_value |= FIELD_PREP(SDHCI_GLI_9750_SW_CTRL_4, + GLI_9750_SW_CTRL_4_VALUE); + sdhci_writel(host, sw_ctrl_value, SDHCI_GLI_9750_SW_CTRL); + + /* reset the tuning flow after reinit and before starting tuning */ + pll_value &= ~SDHCI_GLI_9750_PLL_TX2_INV; + pll_value &= ~SDHCI_GLI_9750_PLL_TX2_DLY; + pll_value |= FIELD_PREP(SDHCI_GLI_9750_PLL_TX2_INV, + GLI_9750_PLL_TX2_INV_VALUE); + pll_value |= FIELD_PREP(SDHCI_GLI_9750_PLL_TX2_DLY, + GLI_9750_PLL_TX2_DLY_VALUE); + + misc_value &= ~SDHCI_GLI_9750_MISC_TX1_INV; + misc_value &= ~SDHCI_GLI_9750_MISC_RX_INV; + misc_value &= ~SDHCI_GLI_9750_MISC_TX1_DLY; + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_TX1_INV, + GLI_9750_MISC_TX1_INV_VALUE); + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV, + GLI_9750_MISC_RX_INV_VALUE); + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_TX1_DLY, + GLI_9750_MISC_TX1_DLY_VALUE); + + parameter_value &= ~SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY; + parameter_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY, + GLI_9750_TUNING_PARAMETERS_RX_DLY_VALUE); + + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1; + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2; + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_1, + GLI_9750_TUNING_CONTROL_GLITCH_1_VALUE); + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_GLITCH_2, + GLI_9750_TUNING_CONTROL_GLITCH_2_VALUE); + + sdhci_writel(host, pll_value, SDHCI_GLI_9750_PLL); + sdhci_writel(host, misc_value, SDHCI_GLI_9750_MISC); + + /* disable tuned clk */ + ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + ctrl2 &= ~SDHCI_CTRL_TUNED_CLK; + sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2); + + /* enable tuning parameters control */ + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_EN; + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_EN, + GLI_9750_TUNING_CONTROL_EN_ON); + sdhci_writel(host, control_value, SDHCI_GLI_9750_TUNING_CONTROL); + + /* write tuning parameters */ + sdhci_writel(host, parameter_value, SDHCI_GLI_9750_TUNING_PARAMETERS); + + /* disable tuning parameters control */ + control_value &= ~SDHCI_GLI_9750_TUNING_CONTROL_EN; + control_value |= FIELD_PREP(SDHCI_GLI_9750_TUNING_CONTROL_EN, + GLI_9750_TUNING_CONTROL_EN_OFF); + sdhci_writel(host, control_value, SDHCI_GLI_9750_TUNING_CONTROL); + + /* clear tuned clk */ + ctrl2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + ctrl2 &= ~SDHCI_CTRL_TUNED_CLK; + sdhci_writew(host, ctrl2, SDHCI_HOST_CONTROL2); + + gl9750_wt_off(host); +} + +static void gli_set_9750_rx_inv(struct sdhci_host *host, bool b) +{ + u32 misc_value; + + gl9750_wt_on(host); + + misc_value = sdhci_readl(host, SDHCI_GLI_9750_MISC); + misc_value &= ~SDHCI_GLI_9750_MISC_RX_INV; + if (b) { + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV, + GLI_9750_MISC_RX_INV_ON); + } else { + misc_value |= FIELD_PREP(SDHCI_GLI_9750_MISC_RX_INV, + GLI_9750_MISC_RX_INV_OFF); + } + sdhci_writel(host, misc_value, SDHCI_GLI_9750_MISC); + + gl9750_wt_off(host); +} + +static int __sdhci_execute_tuning_9750(struct sdhci_host *host, u32 opcode) +{ + int i; + int rx_inv; + + for (rx_inv = 0; rx_inv < 2; rx_inv++) { + gli_set_9750_rx_inv(host, !!rx_inv); + sdhci_start_tuning(host); + + for (i = 0; i < GLI_MAX_TUNING_LOOP; i++) { + u16 ctrl; + + sdhci_send_tuning(host, opcode); + + if (!host->tuning_done) { + sdhci_abort_tuning(host, opcode); + break; + } + + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + if (!(ctrl & SDHCI_CTRL_EXEC_TUNING)) { + if (ctrl & SDHCI_CTRL_TUNED_CLK) + return 0; /* Success! */ + break; + } + } + } + if (!host->tuning_done) { + pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n", + mmc_hostname(host->mmc)); + return -ETIMEDOUT; + } + + pr_info("%s: Tuning failed, falling back to fixed sampling clock\n", + mmc_hostname(host->mmc)); + sdhci_reset_tuning(host); + + return -EAGAIN; +} + +static int gl9750_execute_tuning(struct sdhci_host *host, u32 opcode) +{ + host->mmc->retune_period = 0; + if (host->tuning_mode == SDHCI_TUNING_MODE_1) + host->mmc->retune_period = host->tuning_count; + + gli_set_9750(host); + host->tuning_err = __sdhci_execute_tuning_9750(host, opcode); + sdhci_end_tuning(host); + + return 0; +} + +static int gli_probe_slot_gl9750(struct sdhci_pci_slot *slot) +{ + struct sdhci_host *host = slot->host; + + slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; + sdhci_enable_v4_mode(host); + + return 0; +} + +static int gli_probe_slot_gl9755(struct sdhci_pci_slot *slot) +{ + struct sdhci_host *host = slot->host; + + slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; + sdhci_enable_v4_mode(host); + + return 0; +} + +static void sdhci_gli_voltage_switch(struct sdhci_host *host) +{ + /* + * According to Section 3.6.1 signal voltage switch procedure in + * SD Host Controller Simplified Spec. 4.20, steps 6~8 are as + * follows: + * (6) Set 1.8V Signal Enable in the Host Control 2 register. + * (7) Wait 5ms. 1.8V voltage regulator shall be stable within this + * period. + * (8) If 1.8V Signal Enable is cleared by Host Controller, go to + * step (12). + * + * Wait 5ms after set 1.8V signal enable in Host Control 2 register + * to ensure 1.8V signal enable bit is set by GL9750/GL9755. + */ + usleep_range(5000, 5500); +} + +static void sdhci_gl9750_reset(struct sdhci_host *host, u8 mask) +{ + sdhci_reset(host, mask); + gli_set_9750(host); +} + +static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg) +{ + u32 value; + + value = readl(host->ioaddr + reg); + if (unlikely(reg == SDHCI_MAX_CURRENT && !(value & 0xff))) + value |= 0xc8; + + return value; +} + +static const struct sdhci_ops sdhci_gl9755_ops = { + .set_clock = sdhci_set_clock, + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .voltage_switch = sdhci_gli_voltage_switch, +}; + +const struct sdhci_pci_fixes sdhci_gl9755 = { + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks2 = SDHCI_QUIRK2_BROKEN_DDR50, + .probe_slot = gli_probe_slot_gl9755, + .ops = &sdhci_gl9755_ops, +}; + +static const struct sdhci_ops sdhci_gl9750_ops = { + .read_l = sdhci_gl9750_readl, + .set_clock = sdhci_set_clock, + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_gl9750_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .voltage_switch = sdhci_gli_voltage_switch, + .platform_execute_tuning = gl9750_execute_tuning, +}; + +const struct sdhci_pci_fixes sdhci_gl9750 = { + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks2 = SDHCI_QUIRK2_BROKEN_DDR50, + .probe_slot = gli_probe_slot_gl9750, + .ops = &sdhci_gl9750_ops, +}; diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 1abc9d47a4c0..558202fe64c6 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -68,6 +68,9 @@ #define PCI_DEVICE_ID_SYNOPSYS_DWC_MSHC 0xc202 +#define PCI_DEVICE_ID_GLI_9755 0x9755 +#define PCI_DEVICE_ID_GLI_9750 0x9750 + /* * PCI device class and mask */ @@ -188,5 +191,7 @@ int sdhci_pci_enable_dma(struct sdhci_host *host); extern const struct sdhci_pci_fixes sdhci_arasan; extern const struct sdhci_pci_fixes sdhci_snps; extern const struct sdhci_pci_fixes sdhci_o2; +extern const struct sdhci_pci_fixes sdhci_gl9750; +extern const struct sdhci_pci_fixes sdhci_gl9755; #endif /* __SDHCI_PCI_H */ diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 02d8f524bb9e..7bc950520fd9 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -4,6 +4,7 @@ */ #include <linux/delay.h> +#include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/module.h> #include <linux/init.h> @@ -104,6 +105,7 @@ struct sdhci_tegra_soc_data { const struct sdhci_pltfm_data *pdata; + u64 dma_mask; u32 nvquirks; u8 min_tap_delay; u8 max_tap_delay; @@ -1233,11 +1235,25 @@ static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = { .update_dcmd_desc = sdhci_tegra_update_dcmd_desc, }; +static int tegra_sdhci_set_dma_mask(struct sdhci_host *host) +{ + struct sdhci_pltfm_host *platform = sdhci_priv(host); + struct sdhci_tegra *tegra = sdhci_pltfm_priv(platform); + const struct sdhci_tegra_soc_data *soc = tegra->soc_data; + struct device *dev = mmc_dev(host->mmc); + + if (soc->dma_mask) + return dma_set_mask_and_coherent(dev, soc->dma_mask); + + return 0; +} + static const struct sdhci_ops tegra_sdhci_ops = { .get_ro = tegra_sdhci_get_ro, .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .platform_execute_tuning = tegra_sdhci_execute_tuning, @@ -1257,6 +1273,7 @@ static const struct sdhci_pltfm_data sdhci_tegra20_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra20 = { .pdata = &sdhci_tegra20_pdata, + .dma_mask = DMA_BIT_MASK(32), .nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 | NVQUIRK_ENABLE_BLOCK_GAP_DET, }; @@ -1283,6 +1300,7 @@ static const struct sdhci_pltfm_data sdhci_tegra30_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra30 = { .pdata = &sdhci_tegra30_pdata, + .dma_mask = DMA_BIT_MASK(32), .nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 | NVQUIRK_ENABLE_SDR50 | NVQUIRK_ENABLE_SDR104 | @@ -1295,6 +1313,7 @@ static const struct sdhci_ops tegra114_sdhci_ops = { .write_w = tegra_sdhci_writew, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .platform_execute_tuning = tegra_sdhci_execute_tuning, @@ -1316,6 +1335,7 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra114 = { .pdata = &sdhci_tegra114_pdata, + .dma_mask = DMA_BIT_MASK(32), }; static const struct sdhci_pltfm_data sdhci_tegra124_pdata = { @@ -1325,22 +1345,13 @@ static const struct sdhci_pltfm_data sdhci_tegra124_pdata = { SDHCI_QUIRK_NO_HISPD_BIT | SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | - /* - * The TRM states that the SD/MMC controller found on - * Tegra124 can address 34 bits (the maximum supported by - * the Tegra memory controller), but tests show that DMA - * to or from above 4 GiB doesn't work. This is possibly - * caused by missing programming, though it's not obvious - * what sequence is required. Mark 64-bit DMA broken for - * now to fix this for existing users (e.g. Nyan boards). - */ - SDHCI_QUIRK2_BROKEN_64_BIT_DMA, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .ops = &tegra114_sdhci_ops, }; static const struct sdhci_tegra_soc_data soc_data_tegra124 = { .pdata = &sdhci_tegra124_pdata, + .dma_mask = DMA_BIT_MASK(34), }; static const struct sdhci_ops tegra210_sdhci_ops = { @@ -1349,6 +1360,7 @@ static const struct sdhci_ops tegra210_sdhci_ops = { .write_w = tegra210_sdhci_writew, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .set_uhs_signaling = tegra_sdhci_set_uhs_signaling, @@ -1369,6 +1381,7 @@ static const struct sdhci_pltfm_data sdhci_tegra210_pdata = { static const struct sdhci_tegra_soc_data soc_data_tegra210 = { .pdata = &sdhci_tegra210_pdata, + .dma_mask = DMA_BIT_MASK(34), .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL | NVQUIRK_HAS_PADCALIB | NVQUIRK_DIS_CARD_CLK_CONFIG_TAP | @@ -1383,6 +1396,7 @@ static const struct sdhci_ops tegra186_sdhci_ops = { .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, + .set_dma_mask = tegra_sdhci_set_dma_mask, .set_bus_width = sdhci_set_bus_width, .reset = tegra_sdhci_reset, .set_uhs_signaling = tegra_sdhci_set_uhs_signaling, @@ -1398,20 +1412,13 @@ static const struct sdhci_pltfm_data sdhci_tegra186_pdata = { SDHCI_QUIRK_NO_HISPD_BIT | SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | - /* SDHCI controllers on Tegra186 support 40-bit addressing. - * IOVA addresses are 48-bit wide on Tegra186. - * With 64-bit dma mask used for SDHCI, accesses can - * be broken. Disable 64-bit dma, which would fall back - * to 32-bit dma mask. Ideally 40-bit dma mask would work, - * But it is not supported as of now. - */ - SDHCI_QUIRK2_BROKEN_64_BIT_DMA, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .ops = &tegra186_sdhci_ops, }; static const struct sdhci_tegra_soc_data soc_data_tegra186 = { .pdata = &sdhci_tegra186_pdata, + .dma_mask = DMA_BIT_MASK(40), .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL | NVQUIRK_HAS_PADCALIB | NVQUIRK_DIS_CARD_CLK_CONFIG_TAP | @@ -1424,6 +1431,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra186 = { static const struct sdhci_tegra_soc_data soc_data_tegra194 = { .pdata = &sdhci_tegra186_pdata, + .dma_mask = DMA_BIT_MASK(39), .nvquirks = NVQUIRK_NEEDS_PAD_CONTROL | NVQUIRK_HAS_PADCALIB | NVQUIRK_DIS_CARD_CLK_CONFIG_TAP | diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 4b297f397326..b056400e34b1 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2874,6 +2874,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p) static void sdhci_adma_show_error(struct sdhci_host *host) { void *desc = host->adma_table; + dma_addr_t dma = host->adma_addr; sdhci_dumpregs(host); @@ -2881,18 +2882,21 @@ static void sdhci_adma_show_error(struct sdhci_host *host) struct sdhci_adma2_64_desc *dma_desc = desc; if (host->flags & SDHCI_USE_64_BIT_DMA) - DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n", - desc, le32_to_cpu(dma_desc->addr_hi), + SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n", + (unsigned long long)dma, + le32_to_cpu(dma_desc->addr_hi), le32_to_cpu(dma_desc->addr_lo), le16_to_cpu(dma_desc->len), le16_to_cpu(dma_desc->cmd)); else - DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n", - desc, le32_to_cpu(dma_desc->addr_lo), + SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n", + (unsigned long long)dma, + le32_to_cpu(dma_desc->addr_lo), le16_to_cpu(dma_desc->len), le16_to_cpu(dma_desc->cmd)); desc += host->desc_sz; + dma += host->desc_sz; if (dma_desc->cmd & cpu_to_le16(ADMA2_END)) break; @@ -2968,7 +2972,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) != MMC_BUS_TEST_R) host->data->error = -EILSEQ; else if (intmask & SDHCI_INT_ADMA_ERROR) { - pr_err("%s: ADMA error\n", mmc_hostname(host->mmc)); + pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc), + intmask); sdhci_adma_show_error(host); host->data->error = -EIO; if (host->ops->adma_workaround) @@ -3776,18 +3781,14 @@ int sdhci_setup_host(struct sdhci_host *host) host->flags &= ~SDHCI_USE_ADMA; } - /* - * It is assumed that a 64-bit capable device has set a 64-bit DMA mask - * and *must* do 64-bit DMA. A driver has the opportunity to change - * that during the first call to ->enable_dma(). Similarly - * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to - * implement. - */ if (sdhci_can_64bit_dma(host)) host->flags |= SDHCI_USE_64_BIT_DMA; if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { - ret = sdhci_set_dma_mask(host); + if (host->ops->set_dma_mask) + ret = host->ops->set_dma_mask(host); + else + ret = sdhci_set_dma_mask(host); if (!ret && host->ops->enable_dma) ret = host->ops->enable_dma(host); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index a29c4cd2d92e..0ed3e0eaef5f 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -622,6 +622,7 @@ struct sdhci_ops { u32 (*irq)(struct sdhci_host *host, u32 intmask); + int (*set_dma_mask)(struct sdhci_host *host); int (*enable_dma)(struct sdhci_host *host); unsigned int (*get_max_clock)(struct sdhci_host *host); unsigned int (*get_min_clock)(struct sdhci_host *host); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index a8d56887ec88..3e9f45aec8d1 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -392,9 +392,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; if (++(arena->freelist[lane].seq) == 4) arena->freelist[lane].seq = 1; - if (ent_e_flag(ent->old_map)) + if (ent_e_flag(le32_to_cpu(ent->old_map))) arena->freelist[lane].has_err = 1; - arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map)); + arena->freelist[lane].block = ent_lba(le32_to_cpu(ent->old_map)); return ret; } @@ -560,8 +560,8 @@ static int btt_freelist_init(struct arena_info *arena) * FIXME: if error clearing fails during init, we want to make * the BTT read-only */ - if (ent_e_flag(log_new.old_map) && - !ent_normal(log_new.old_map)) { + if (ent_e_flag(le32_to_cpu(log_new.old_map)) && + !ent_normal(le32_to_cpu(log_new.old_map))) { arena->freelist[i].has_err = 1; ret = arena_clear_freelist_error(arena, i); if (ret) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 75a58a6e9615..d47412dcdf38 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -180,7 +180,7 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) sector_t sector; /* make sure device is a region */ - if (!is_nd_pmem(dev)) + if (!is_memory(dev)) return 0; nd_region = to_nd_region(dev); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 43401325c874..cca0a3ba1d2c 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1987,7 +1987,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region, nd_mapping = &nd_region->mapping[i]; label_ent = list_first_entry_or_null(&nd_mapping->labels, typeof(*label_ent), list); - label0 = label_ent ? label_ent->label : 0; + label0 = label_ent ? label_ent->label : NULL; if (!label0) { WARN_ON(1); @@ -2322,8 +2322,9 @@ static struct device **scan_labels(struct nd_region *nd_region) continue; /* skip labels that describe extents outside of the region */ - if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end) - continue; + if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start || + __le64_to_cpu(nd_label->dpa) > map_end) + continue; i = add_namespace_resource(nd_region, nd_label, devs, count); if (i < 0) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e89af4b2d8e9..ee5c04070ef9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -289,11 +289,7 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE -#else -#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE -#endif +#define MAX_NVDIMM_ALIGN 4 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index bb9cc5cf0873..60d81fae06ee 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -103,39 +103,42 @@ static ssize_t align_show(struct device *dev, return sprintf(buf, "%ld\n", nd_pfn->align); } -static const unsigned long *nd_pfn_supported_alignments(void) +static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) { - /* - * This needs to be a non-static variable because the *_SIZE - * macros aren't always constants. - */ - const unsigned long supported_alignments[] = { - PAGE_SIZE, -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - HPAGE_PMD_SIZE, -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD - HPAGE_PUD_SIZE, -#endif -#endif - 0, - }; - static unsigned long data[ARRAY_SIZE(supported_alignments)]; - memcpy(data, supported_alignments, sizeof(data)); + alignments[0] = PAGE_SIZE; + + if (has_transparent_hugepage()) { + alignments[1] = HPAGE_PMD_SIZE; + if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) + alignments[2] = HPAGE_PUD_SIZE; + } + + return alignments; +} + +/* + * Use pmd mapping if supported as default alignment + */ +static unsigned long nd_pfn_default_alignment(void) +{ - return data; + if (has_transparent_hugepage()) + return HPAGE_PMD_SIZE; + return PAGE_SIZE; } static ssize_t align_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); + unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; ssize_t rc; nd_device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, - nd_pfn_supported_alignments()); + nd_pfn_supported_alignments(aligns)); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); @@ -259,7 +262,10 @@ static DEVICE_ATTR_RO(size); static ssize_t supported_alignments_show(struct device *dev, struct device_attribute *attr, char *buf) { - return nd_size_select_show(0, nd_pfn_supported_alignments(), buf); + unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; + + return nd_size_select_show(0, + nd_pfn_supported_alignments(aligns), buf); } static DEVICE_ATTR_RO(supported_alignments); @@ -302,7 +308,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, return NULL; nd_pfn->mode = PFN_MODE_NONE; - nd_pfn->align = PFN_DEFAULT_ALIGNMENT; + nd_pfn->align = nd_pfn_default_alignment(); dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { @@ -412,6 +418,21 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) return 0; } +static bool nd_supported_alignment(unsigned long align) +{ + int i; + unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, }; + + if (align == 0) + return false; + + nd_pfn_supported_alignments(supported); + for (i = 0; supported[i]; i++) + if (align == supported[i]) + return true; + return false; +} + /** * nd_pfn_validate - read and validate info-block * @nd_pfn: fsdax namespace runtime state / properties @@ -496,6 +517,18 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EOPNOTSUPP; } + /* + * Check whether the we support the alignment. For Dax if the + * superblock alignment is not matching, we won't initialize + * the device. + */ + if (!nd_supported_alignment(align) && + !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) { + dev_err(&nd_pfn->dev, "init failed, alignment mismatch: " + "%ld:%ld\n", nd_pfn->align, align); + return -EOPNOTSUPP; + } + if (!nd_pfn->uuid) { /* * When probing a namepace via nd_pfn_probe() the uuid @@ -639,9 +672,11 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; + resource_size_t end = nsio->res.end - end_trunc; struct vmem_altmap __altmap = { .base_pfn = init_altmap_base(base), .reserve = init_altmap_reserve(base), + .end_pfn = PHYS_PFN(end), }; memcpy(res, &nsio->res, sizeof(*res)); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 37bf8719a2a4..0f6978e72e7c 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -34,7 +34,7 @@ static int nd_region_probe(struct device *dev) if (rc) return rc; - if (is_nd_pmem(&nd_region->dev)) { + if (is_memory(&nd_region->dev)) { struct resource ndr_res; if (devm_init_badblocks(dev, &nd_region->bb)) @@ -123,7 +123,7 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event) struct nd_region *nd_region = to_nd_region(dev); struct resource res; - if (is_nd_pmem(&nd_region->dev)) { + if (is_memory(&nd_region->dev)) { res.start = nd_region->ndr_start; res.end = nd_region->ndr_start + nd_region->ndr_size - 1; diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 3fd6b59abd33..ef423ba1a711 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -632,11 +632,11 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) if (!is_memory(dev) && a == &dev_attr_dax_seed.attr) return 0; - if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) + if (!is_memory(dev) && a == &dev_attr_badblocks.attr) return 0; if (a == &dev_attr_resource.attr) { - if (is_nd_pmem(dev)) + if (is_memory(dev)) return 0400; else return 0; @@ -1168,6 +1168,9 @@ EXPORT_SYMBOL_GPL(nvdimm_has_cache); bool is_nvdimm_sync(struct nd_region *nd_region) { + if (is_nd_volatile(&nd_region->dev)) + return true; + return is_nd_pmem(&nd_region->dev) && !test_bit(ND_REGION_ASYNC, &nd_region->flags); } diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 9e45b207ff01..89b85970912d 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -177,6 +177,10 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) || !nvdimm->sec.flags) return -EIO; + /* No need to go further if security is disabled */ + if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) + return 0; + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { dev_dbg(dev, "Security operation in progress.\n"); return -EBUSY; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 108f60b46804..fd7dea36c3b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) */ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; - revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); + /* + * Revalidate after unblocking dispatchers that may be holding bd_butex + */ + revalidate_disk(ns->disk); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -847,7 +850,7 @@ out: static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u32 *result, unsigned timeout) + u32 meta_seed, u64 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -888,7 +891,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, else ret = nvme_req(req)->status; if (result) - *result = le32_to_cpu(nvme_req(req)->result.u32); + *result = le64_to_cpu(nvme_req(req)->result.u64); if (meta && !ret && !write) { if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; @@ -1335,6 +1338,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_command c; unsigned timeout = 0; u32 effects; + u64 result; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + if (cmd.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10 = cpu_to_le32(cmd.cdw10); + c.common.cdw11 = cpu_to_le32(cmd.cdw11); + c.common.cdw12 = cpu_to_le32(cmd.cdw12); + c.common.cdw13 = cpu_to_le32(cmd.cdw13); + c.common.cdw14 = cpu_to_le32(cmd.cdw14); + c.common.cdw15 = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + effects = nvme_passthru_start(ctrl, ns, cmd.opcode); + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + (void __user *)(uintptr_t)cmd.addr, cmd.data_len, + (void __user *)(uintptr_t)cmd.metadata, + cmd.metadata_len, 0, &result, timeout); + nvme_passthru_end(ctrl, effects); + + if (status >= 0) { + if (put_user(result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd64 __user *ucmd) +{ + struct nvme_passthru_cmd64 cmd; + struct nvme_command c; + unsigned timeout = 0; + u32 effects; int status; if (!capable(CAP_SYS_ADMIN)) @@ -1405,6 +1456,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) srcu_read_unlock(&head->srcu, idx); } +static bool is_ctrl_ioctl(unsigned int cmd) +{ + if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) + return true; + if (is_sed_ioctl(cmd)) + return true; + return false; +} + +static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp, + struct nvme_ns_head *head, + int srcu_idx) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + int ret; + + nvme_get_ctrl(ns->ctrl); + nvme_put_ns_from_disk(head, srcu_idx); + + switch (cmd) { + case NVME_IOCTL_ADMIN_CMD: + ret = nvme_user_cmd(ctrl, NULL, argp); + break; + case NVME_IOCTL_ADMIN64_CMD: + ret = nvme_user_cmd64(ctrl, NULL, argp); + break; + default: + ret = sed_ioctl(ctrl->opal_dev, cmd, argp); + break; + } + nvme_put_ctrl(ctrl); + return ret; +} + static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1422,20 +1508,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, * seperately and drop the ns SRCU reference early. This avoids a * deadlock when deleting namespaces using the passthrough interface. */ - if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) { - struct nvme_ctrl *ctrl = ns->ctrl; - - nvme_get_ctrl(ns->ctrl); - nvme_put_ns_from_disk(head, srcu_idx); - - if (cmd == NVME_IOCTL_ADMIN_CMD) - ret = nvme_user_cmd(ctrl, NULL, argp); - else - ret = sed_ioctl(ctrl->opal_dev, cmd, argp); - - nvme_put_ctrl(ctrl); - return ret; - } + if (is_ctrl_ioctl(cmd)) + return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); switch (cmd) { case NVME_IOCTL_ID: @@ -1448,6 +1522,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, case NVME_IOCTL_SUBMIT_IO: ret = nvme_submit_io(ns, argp); break; + case NVME_IOCTL_IO64_CMD: + ret = nvme_user_cmd64(ns->ctrl, ns, argp); + break; default: if (ns->ndev) ret = nvme_nvm_ioctl(ns, cmd, arg); @@ -2289,6 +2366,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, + }, + { + /* + * This Kingston E8FK11.T firmware version has no interrupt + * after resume with actions related to suspend to idle + * https://bugzilla.kernel.org/show_bug.cgi?id=204887 + */ + .vid = 0x2646, + .fr = "E8FK11.T", + .quirks = NVME_QUIRK_SIMPLE_SUSPEND, } }; @@ -2540,8 +2627,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) list_add_tail(&subsys->entry, &nvme_subsystems); } - if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, - dev_name(ctrl->device))) { + ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, + dev_name(ctrl->device)); + if (ret) { dev_err(ctrl->device, "failed to create sysfs link from subsystem.\n"); goto out_put_subsystem; @@ -2838,6 +2926,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); + case NVME_IOCTL_ADMIN64_CMD: + return nvme_user_cmd64(ctrl, NULL, argp); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: @@ -3045,6 +3135,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); nvme_show_int_function(cntlid); nvme_show_int_function(numa_node); +nvme_show_int_function(queue_count); +nvme_show_int_function(sqsize); static ssize_t nvme_sysfs_delete(struct device *dev, struct device_attribute *attr, const char *buf, @@ -3125,6 +3217,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_address.attr, &dev_attr_state.attr, &dev_attr_numa_node.attr, + &dev_attr_queue_count.attr, + &dev_attr_sqsize.attr, NULL }; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b5013c101b35..38a83ef5bcd3 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -221,6 +221,7 @@ struct nvme_ctrl { u16 oacs; u16 nssa; u16 nr_streams; + u16 sqsize; u32 max_namespaces; atomic_t abort_limit; u8 vwc; @@ -269,7 +270,6 @@ struct nvme_ctrl { u16 hmmaxd; /* Fabrics only */ - u16 sqsize; u32 ioccsz; u32 iorcsz; u16 icdoff; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c0808f9eb8ab..bb88681f4dc3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2946,11 +2946,21 @@ static int nvme_suspend(struct device *dev) if (ret < 0) goto unfreeze; + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); + ret = nvme_set_power_state(ctrl, ctrl->npss); if (ret < 0) goto unfreeze; if (ret) { + /* discard the saved state */ + pci_load_saved_state(pdev, NULL); + /* * Clearing npss forces a controller reset on resume. The * correct value will be resdicovered then. @@ -2958,14 +2968,7 @@ static int nvme_suspend(struct device *dev) nvme_dev_disable(ndev, true); ctrl->npss = 0; ret = 0; - goto unfreeze; } - /* - * A saved state prevents pci pm from generically controlling the - * device's power. If we're using protocol specific settings, we don't - * want pci interfering. - */ - pci_save_state(pdev); unfreeze: nvme_unfreeze(ctrl); return ret; @@ -3090,6 +3093,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dfa07bb9dfeb..4d280160dd3f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -427,7 +427,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) { return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len); + ibdev->attrs.max_fast_reg_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -437,7 +437,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) const int cq_factor = send_wr_factor + 1; /* + RECV */ int comp_vector, idx = nvme_rdma_queue_idx(queue); enum ib_poll_context poll_ctx; - int ret; + int ret, pages_per_mr; queue->device = nvme_rdma_find_get_device(queue->cm_id); if (!queue->device) { @@ -479,10 +479,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_qp; } + /* + * Currently we don't use SG_GAPS MR's so if the first entry is + * misaligned we'll end up using two entries for a single data page, + * so one additional entry is required. + */ + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, - nvme_rdma_get_max_fr_pages(ibdev), 0); + pages_per_mr, 0); if (ret) { dev_err(queue->ctrl->ctrl.device, "failed to initialize MR pool sized %d for QID %d\n", @@ -614,7 +620,8 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) if (!ret) { set_bit(NVME_RDMA_Q_LIVE, &queue->flags); } else { - __nvme_rdma_stop_queue(queue); + if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + __nvme_rdma_stop_queue(queue); dev_info(ctrl->ctrl.device, "failed to connect queue: %d ret=%d\n", idx, ret); } @@ -820,8 +827,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) goto out_stop_queue; - ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); + ctrl->ctrl.max_segments = ctrl->max_fr_pages; + ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ffd5957637a..385a5212c10f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1042,7 +1042,7 @@ static void nvme_tcp_io_work(struct work_struct *w) { struct nvme_tcp_queue *queue = container_of(w, struct nvme_tcp_queue, io_work); - unsigned long start = jiffies + msecs_to_jiffies(1); + unsigned long deadline = jiffies + msecs_to_jiffies(1); do { bool pending = false; @@ -1067,7 +1067,7 @@ static void nvme_tcp_io_work(struct work_struct *w) if (!pending) return; - } while (time_after(jiffies, start)); /* quota is exhausted */ + } while (!time_after(jiffies, deadline)); /* quota is exhausted */ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index de0bff70ebb6..32008d85172b 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -11,10 +11,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) { const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; - /* Number of physical blocks per logical block. */ - const u32 ppl = ql->physical_block_size / ql->logical_block_size; - /* Physical blocks per logical block, 0's based. */ - const __le16 ppl0b = to0based(ppl); + /* Number of logical blocks per physical block. */ + const u32 lpp = ql->physical_block_size / ql->logical_block_size; + /* Logical blocks per physical block, 0's based. */ + const __le16 lpp0b = to0based(lpp); /* * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -25,9 +25,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) * field from the identify controller data structure should be used. */ id->nsfeat |= 1 << 1; - id->nawun = ppl0b; - id->nawupf = ppl0b; - id->nacwu = ppl0b; + id->nawun = lpp0b; + id->nawupf = lpp0b; + id->nacwu = lpp0b; /* * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and @@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) */ id->nsfeat |= 1 << 4; /* NPWG = Namespace Preferred Write Granularity. 0's based */ - id->npwg = ppl0b; + id->npwg = lpp0b; /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index bf4f03474e89..d535080b781f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,8 +348,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) return 0; err: - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); return NVME_SC_INTERNAL; } @@ -554,8 +553,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) if (queue->nvme_sq.sqhd_disabled) { kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } return 1; @@ -586,8 +584,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, return -EAGAIN; kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); return 1; @@ -1310,8 +1307,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) nvmet_req_uninit(&cmd->req); nvmet_tcp_unmap_pdu_iovec(cmd); kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) diff --git a/drivers/reset/reset-scmi.c b/drivers/reset/reset-scmi.c index c6d3c8427f14..b46df80ec6c3 100644 --- a/drivers/reset/reset-scmi.c +++ b/drivers/reset/reset-scmi.c @@ -102,6 +102,7 @@ static int scmi_reset_probe(struct scmi_device *sdev) data->rcdev.owner = THIS_MODULE; data->rcdev.of_node = np; data->rcdev.nr_resets = handle->reset_ops->num_domains_get(handle); + data->handle = handle; return devm_reset_controller_register(dev, &data->rcdev); } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index fc53e1e221f0..c94184d080f8 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1553,8 +1553,8 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device) if (rc == 0) { memcpy(&private->vsq, vsq, sizeof(*vsq)); } else { - dev_warn(&device->cdev->dev, - "Reading the volume storage information failed with rc=%d\n", rc); + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading the volume storage information failed with rc=%d", rc); } if (useglobal) @@ -1737,8 +1737,8 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device) if (rc == 0) { dasd_eckd_cpy_ext_pool_data(device, lcq); } else { - dev_warn(&device->cdev->dev, - "Reading the logical configuration failed with rc=%d\n", rc); + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading the logical configuration failed with rc=%d", rc); } dasd_sfree_request(cqr, cqr->memdev); @@ -2020,14 +2020,10 @@ dasd_eckd_check_characteristics(struct dasd_device *device) dasd_eckd_read_features(device); /* Read Volume Information */ - rc = dasd_eckd_read_vol_info(device); - if (rc) - goto out_err3; + dasd_eckd_read_vol_info(device); /* Read Extent Pool Information */ - rc = dasd_eckd_read_ext_pool_info(device); - if (rc) - goto out_err3; + dasd_eckd_read_ext_pool_info(device); /* Read Device Characteristics */ rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, @@ -2059,9 +2055,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device) if (readonly) set_bit(DASD_FLAG_DEVICE_RO, &device->flags); - if (dasd_eckd_is_ese(device)) - dasd_set_feature(device->cdev, DASD_FEATURE_DISCARD, 1); - dev_info(&device->cdev->dev, "New DASD %04X/%02X (CU %04X/%02X) " "with %d cylinders, %d heads, %d sectors%s\n", private->rdc_data.dev_type, @@ -3695,14 +3688,6 @@ static int dasd_eckd_release_space(struct dasd_device *device, return -EINVAL; } -static struct dasd_ccw_req * -dasd_eckd_build_cp_discard(struct dasd_device *device, struct dasd_block *block, - struct request *req, sector_t first_trk, - sector_t last_trk) -{ - return dasd_eckd_dso_ras(device, block, req, first_trk, last_trk, 1); -} - static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( struct dasd_device *startdev, struct dasd_block *block, @@ -4447,10 +4432,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev, cmdwtd = private->features.feature[12] & 0x40; use_prefix = private->features.feature[8] & 0x01; - if (req_op(req) == REQ_OP_DISCARD) - return dasd_eckd_build_cp_discard(startdev, block, req, - first_trk, last_trk); - cqr = NULL; if (cdlspecial || dasd_page_cache) { /* do nothing, just fall through to the cmd mode single case */ @@ -4729,14 +4710,12 @@ static struct dasd_ccw_req *dasd_eckd_build_alias_cp(struct dasd_device *base, struct dasd_block *block, struct request *req) { - struct dasd_device *startdev = NULL; struct dasd_eckd_private *private; - struct dasd_ccw_req *cqr; + struct dasd_device *startdev; unsigned long flags; + struct dasd_ccw_req *cqr; - /* Discard requests can only be processed on base devices */ - if (req_op(req) != REQ_OP_DISCARD) - startdev = dasd_alias_get_start_dev(base); + startdev = dasd_alias_get_start_dev(base); if (!startdev) startdev = base; private = startdev->private; @@ -5663,14 +5642,10 @@ static int dasd_eckd_restore_device(struct dasd_device *device) dasd_eckd_read_features(device); /* Read Volume Information */ - rc = dasd_eckd_read_vol_info(device); - if (rc) - goto out_err2; + dasd_eckd_read_vol_info(device); /* Read Extent Pool Information */ - rc = dasd_eckd_read_ext_pool_info(device); - if (rc) - goto out_err2; + dasd_eckd_read_ext_pool_info(device); /* Read Device Characteristics */ rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, @@ -6521,20 +6496,8 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) unsigned int logical_block_size = block->bp_block; struct request_queue *q = block->request_queue; struct dasd_device *device = block->base; - struct dasd_eckd_private *private; - unsigned int max_discard_sectors; - unsigned int max_bytes; - unsigned int ext_bytes; /* Extent Size in Bytes */ - int recs_per_trk; - int trks_per_cyl; - int ext_limit; - int ext_size; /* Extent Size in Cylinders */ int max; - private = device->private; - trks_per_cyl = private->rdc_data.trk_per_cyl; - recs_per_trk = recs_per_track(&private->rdc_data, 0, logical_block_size); - if (device->features & DASD_FEATURE_USERAW) { /* * the max_blocks value for raw_track access is 256 @@ -6555,28 +6518,6 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) /* With page sized segments each segment can be translated into one idaw/tidaw */ blk_queue_max_segment_size(q, PAGE_SIZE); blk_queue_segment_boundary(q, PAGE_SIZE - 1); - - if (dasd_eckd_is_ese(device)) { - /* - * Depending on the extent size, up to UINT_MAX bytes can be - * accepted. However, neither DASD_ECKD_RAS_EXTS_MAX nor the - * device limits should be exceeded. - */ - ext_size = dasd_eckd_ext_size(device); - ext_limit = min(private->real_cyl / ext_size, DASD_ECKD_RAS_EXTS_MAX); - ext_bytes = ext_size * trks_per_cyl * recs_per_trk * - logical_block_size; - max_bytes = UINT_MAX - (UINT_MAX % ext_bytes); - if (max_bytes / ext_bytes > ext_limit) - max_bytes = ext_bytes * ext_limit; - - max_discard_sectors = max_bytes / 512; - - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = ext_bytes; - q->limits.discard_alignment = ext_bytes; - } } static struct ccw_driver dasd_eckd_driver = { diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index f4ca1d29d61b..cd164886132f 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -113,7 +113,7 @@ static void set_impl_params(struct qdio_irq *irq_ptr, irq_ptr->qib.pfmt = qib_param_field_format; if (qib_param_field) memcpy(irq_ptr->qib.parm, qib_param_field, - QDIO_MAX_BUFFERS_PER_Q); + sizeof(irq_ptr->qib.parm)); if (!input_slib_elements) goto output; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a7868c8133ee..dda274351c21 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4715,8 +4715,7 @@ static int qeth_qdio_establish(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "qdioest"); - qib_param_field = kzalloc(QDIO_MAX_BUFFERS_PER_Q, - GFP_KERNEL); + qib_param_field = kzalloc(FIELD_SIZEOF(struct qib, parm), GFP_KERNEL); if (!qib_param_field) { rc = -ENOMEM; goto out_free_nothing; diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 9966364a6deb..001a21abcc28 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -310,7 +310,7 @@ config DOVE_THERMAL config DB8500_THERMAL tristate "DB8500 thermal management" - depends on MFD_DB8500_PRCMU + depends on MFD_DB8500_PRCMU && OF default y help Adds DB8500 thermal management implementation according to the thermal diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index b71a999d17d6..372dbbaaafb8 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -3,9 +3,9 @@ * db8500_thermal.c - DB8500 Thermal Management Implementation * * Copyright (C) 2012 ST-Ericsson - * Copyright (C) 2012 Linaro Ltd. + * Copyright (C) 2012-2019 Linaro Ltd. * - * Author: Hongbo Zhang <hongbo.zhang@linaro.com> + * Authors: Hongbo Zhang, Linus Walleij */ #include <linux/cpu_cooling.h> @@ -13,7 +13,6 @@ #include <linux/mfd/dbx500-prcmu.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/platform_data/db8500_thermal.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/thermal.h> @@ -21,453 +20,201 @@ #define PRCMU_DEFAULT_MEASURE_TIME 0xFFF #define PRCMU_DEFAULT_LOW_TEMP 0 +/** + * db8500_thermal_points - the interpolation points that trigger + * interrupts + */ +static const unsigned long db8500_thermal_points[] = { + 15000, + 20000, + 25000, + 30000, + 35000, + 40000, + 45000, + 50000, + 55000, + 60000, + 65000, + 70000, + 75000, + 80000, + /* + * This is where things start to get really bad for the + * SoC and the thermal zones should be set up to trigger + * critical temperature at 85000 mC so we don't get above + * this point. + */ + 85000, + 90000, + 95000, + 100000, +}; + struct db8500_thermal_zone { - struct thermal_zone_device *therm_dev; - struct mutex th_lock; - struct work_struct therm_work; - struct db8500_thsens_platform_data *trip_tab; - enum thermal_device_mode mode; + struct thermal_zone_device *tz; enum thermal_trend trend; - unsigned long cur_temp_pseudo; + unsigned long interpolated_temp; unsigned int cur_index; }; -/* Local function to check if thermal zone matches cooling devices */ -static int db8500_thermal_match_cdev(struct thermal_cooling_device *cdev, - struct db8500_trip_point *trip_point) -{ - int i; - - if (!strlen(cdev->type)) - return -EINVAL; - - for (i = 0; i < COOLING_DEV_MAX; i++) { - if (!strcmp(trip_point->cdev_name[i], cdev->type)) - return 0; - } - - return -ENODEV; -} - -/* Callback to bind cooling device to thermal zone */ -static int db8500_cdev_bind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned long max_state, upper, lower; - int i, ret = -EINVAL; - - cdev->ops->get_max_state(cdev, &max_state); - - for (i = 0; i < ptrips->num_trips; i++) { - if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i])) - continue; - - upper = lower = i > max_state ? max_state : i; - - ret = thermal_zone_bind_cooling_device(thermal, i, cdev, - upper, lower, THERMAL_WEIGHT_DEFAULT); - - dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type, - i, ret, ret ? "fail" : "succeed"); - } - - return ret; -} - -/* Callback to unbind cooling device from thermal zone */ -static int db8500_cdev_unbind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - int i, ret = -EINVAL; - - for (i = 0; i < ptrips->num_trips; i++) { - if (db8500_thermal_match_cdev(cdev, &ptrips->trip_points[i])) - continue; - - ret = thermal_zone_unbind_cooling_device(thermal, i, cdev); - - dev_info(&cdev->device, "%s unbind from %d: %s\n", cdev->type, - i, ret ? "fail" : "succeed"); - } - - return ret; -} - /* Callback to get current temperature */ -static int db8500_sys_get_temp(struct thermal_zone_device *thermal, int *temp) +static int db8500_thermal_get_temp(void *data, int *temp) { - struct db8500_thermal_zone *pzone = thermal->devdata; + struct db8500_thermal_zone *th = data; /* * TODO: There is no PRCMU interface to get temperature data currently, * so a pseudo temperature is returned , it works for thermal framework * and this will be fixed when the PRCMU interface is available. */ - *temp = pzone->cur_temp_pseudo; + *temp = th->interpolated_temp; return 0; } /* Callback to get temperature changing trend */ -static int db8500_sys_get_trend(struct thermal_zone_device *thermal, - int trip, enum thermal_trend *trend) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - - *trend = pzone->trend; - - return 0; -} - -/* Callback to get thermal zone mode */ -static int db8500_sys_get_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode *mode) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - - mutex_lock(&pzone->th_lock); - *mode = pzone->mode; - mutex_unlock(&pzone->th_lock); - - return 0; -} - -/* Callback to set thermal zone mode */ -static int db8500_sys_set_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode mode) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - - mutex_lock(&pzone->th_lock); - - pzone->mode = mode; - if (mode == THERMAL_DEVICE_ENABLED) - schedule_work(&pzone->therm_work); - - mutex_unlock(&pzone->th_lock); - - return 0; -} - -/* Callback to get trip point type */ -static int db8500_sys_get_trip_type(struct thermal_zone_device *thermal, - int trip, enum thermal_trip_type *type) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - - if (trip >= ptrips->num_trips) - return -EINVAL; - - *type = ptrips->trip_points[trip].type; - - return 0; -} - -/* Callback to get trip point temperature */ -static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal, - int trip, int *temp) +static int db8500_thermal_get_trend(void *data, int trip, enum thermal_trend *trend) { - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; + struct db8500_thermal_zone *th = data; - if (trip >= ptrips->num_trips) - return -EINVAL; - - *temp = ptrips->trip_points[trip].temp; + *trend = th->trend; return 0; } -/* Callback to get critical trip point temperature */ -static int db8500_sys_get_crit_temp(struct thermal_zone_device *thermal, - int *temp) -{ - struct db8500_thermal_zone *pzone = thermal->devdata; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - int i; - - for (i = ptrips->num_trips - 1; i > 0; i--) { - if (ptrips->trip_points[i].type == THERMAL_TRIP_CRITICAL) { - *temp = ptrips->trip_points[i].temp; - return 0; - } - } - - return -EINVAL; -} - -static struct thermal_zone_device_ops thdev_ops = { - .bind = db8500_cdev_bind, - .unbind = db8500_cdev_unbind, - .get_temp = db8500_sys_get_temp, - .get_trend = db8500_sys_get_trend, - .get_mode = db8500_sys_get_mode, - .set_mode = db8500_sys_set_mode, - .get_trip_type = db8500_sys_get_trip_type, - .get_trip_temp = db8500_sys_get_trip_temp, - .get_crit_temp = db8500_sys_get_crit_temp, +static struct thermal_zone_of_device_ops thdev_ops = { + .get_temp = db8500_thermal_get_temp, + .get_trend = db8500_thermal_get_trend, }; -static void db8500_thermal_update_config(struct db8500_thermal_zone *pzone, - unsigned int idx, enum thermal_trend trend, - unsigned long next_low, unsigned long next_high) +static void db8500_thermal_update_config(struct db8500_thermal_zone *th, + unsigned int idx, + enum thermal_trend trend, + unsigned long next_low, + unsigned long next_high) { prcmu_stop_temp_sense(); - pzone->cur_index = idx; - pzone->cur_temp_pseudo = (next_low + next_high)/2; - pzone->trend = trend; + th->cur_index = idx; + th->interpolated_temp = (next_low + next_high)/2; + th->trend = trend; + /* + * The PRCMU accept absolute temperatures in celsius so divide + * down the millicelsius with 1000 + */ prcmu_config_hotmon((u8)(next_low/1000), (u8)(next_high/1000)); prcmu_start_temp_sense(PRCMU_DEFAULT_MEASURE_TIME); } static irqreturn_t prcmu_low_irq_handler(int irq, void *irq_data) { - struct db8500_thermal_zone *pzone = irq_data; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned int idx = pzone->cur_index; + struct db8500_thermal_zone *th = irq_data; + unsigned int idx = th->cur_index; unsigned long next_low, next_high; - if (unlikely(idx == 0)) + if (idx == 0) /* Meaningless for thermal management, ignoring it */ return IRQ_HANDLED; if (idx == 1) { - next_high = ptrips->trip_points[0].temp; + next_high = db8500_thermal_points[0]; next_low = PRCMU_DEFAULT_LOW_TEMP; } else { - next_high = ptrips->trip_points[idx-1].temp; - next_low = ptrips->trip_points[idx-2].temp; + next_high = db8500_thermal_points[idx - 1]; + next_low = db8500_thermal_points[idx - 2]; } idx -= 1; - db8500_thermal_update_config(pzone, idx, THERMAL_TREND_DROPPING, - next_low, next_high); - - dev_dbg(&pzone->therm_dev->device, + db8500_thermal_update_config(th, idx, THERMAL_TREND_DROPPING, + next_low, next_high); + dev_dbg(&th->tz->device, "PRCMU set max %ld, min %ld\n", next_high, next_low); - schedule_work(&pzone->therm_work); + thermal_zone_device_update(th->tz, THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } static irqreturn_t prcmu_high_irq_handler(int irq, void *irq_data) { - struct db8500_thermal_zone *pzone = irq_data; - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned int idx = pzone->cur_index; + struct db8500_thermal_zone *th = irq_data; + unsigned int idx = th->cur_index; unsigned long next_low, next_high; + int num_points = ARRAY_SIZE(db8500_thermal_points); - if (idx < ptrips->num_trips - 1) { - next_high = ptrips->trip_points[idx+1].temp; - next_low = ptrips->trip_points[idx].temp; + if (idx < num_points - 1) { + next_high = db8500_thermal_points[idx+1]; + next_low = db8500_thermal_points[idx]; idx += 1; - db8500_thermal_update_config(pzone, idx, THERMAL_TREND_RAISING, - next_low, next_high); + db8500_thermal_update_config(th, idx, THERMAL_TREND_RAISING, + next_low, next_high); - dev_dbg(&pzone->therm_dev->device, - "PRCMU set max %ld, min %ld\n", next_high, next_low); - } else if (idx == ptrips->num_trips - 1) - pzone->cur_temp_pseudo = ptrips->trip_points[idx].temp + 1; + dev_info(&th->tz->device, + "PRCMU set max %ld, min %ld\n", next_high, next_low); + } else if (idx == num_points - 1) + /* So we roof out 1 degree over the max point */ + th->interpolated_temp = db8500_thermal_points[idx] + 1; - schedule_work(&pzone->therm_work); + thermal_zone_device_update(th->tz, THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } -static void db8500_thermal_work(struct work_struct *work) -{ - enum thermal_device_mode cur_mode; - struct db8500_thermal_zone *pzone; - - pzone = container_of(work, struct db8500_thermal_zone, therm_work); - - mutex_lock(&pzone->th_lock); - cur_mode = pzone->mode; - mutex_unlock(&pzone->th_lock); - - if (cur_mode == THERMAL_DEVICE_DISABLED) - return; - - thermal_zone_device_update(pzone->therm_dev, THERMAL_EVENT_UNSPECIFIED); - dev_dbg(&pzone->therm_dev->device, "thermal work finished.\n"); -} - -#ifdef CONFIG_OF -static struct db8500_thsens_platform_data* - db8500_thermal_parse_dt(struct platform_device *pdev) -{ - struct db8500_thsens_platform_data *ptrips; - struct device_node *np = pdev->dev.of_node; - char prop_name[32]; - const char *tmp_str; - u32 tmp_data; - int i, j; - - ptrips = devm_kzalloc(&pdev->dev, sizeof(*ptrips), GFP_KERNEL); - if (!ptrips) - return NULL; - - if (of_property_read_u32(np, "num-trips", &tmp_data)) - goto err_parse_dt; - - if (tmp_data > THERMAL_MAX_TRIPS) - goto err_parse_dt; - - ptrips->num_trips = tmp_data; - - for (i = 0; i < ptrips->num_trips; i++) { - sprintf(prop_name, "trip%d-temp", i); - if (of_property_read_u32(np, prop_name, &tmp_data)) - goto err_parse_dt; - - ptrips->trip_points[i].temp = tmp_data; - - sprintf(prop_name, "trip%d-type", i); - if (of_property_read_string(np, prop_name, &tmp_str)) - goto err_parse_dt; - - if (!strcmp(tmp_str, "active")) - ptrips->trip_points[i].type = THERMAL_TRIP_ACTIVE; - else if (!strcmp(tmp_str, "passive")) - ptrips->trip_points[i].type = THERMAL_TRIP_PASSIVE; - else if (!strcmp(tmp_str, "hot")) - ptrips->trip_points[i].type = THERMAL_TRIP_HOT; - else if (!strcmp(tmp_str, "critical")) - ptrips->trip_points[i].type = THERMAL_TRIP_CRITICAL; - else - goto err_parse_dt; - - sprintf(prop_name, "trip%d-cdev-num", i); - if (of_property_read_u32(np, prop_name, &tmp_data)) - goto err_parse_dt; - - if (tmp_data > COOLING_DEV_MAX) - goto err_parse_dt; - - for (j = 0; j < tmp_data; j++) { - sprintf(prop_name, "trip%d-cdev-name%d", i, j); - if (of_property_read_string(np, prop_name, &tmp_str)) - goto err_parse_dt; - - if (strlen(tmp_str) >= THERMAL_NAME_LENGTH) - goto err_parse_dt; - - strcpy(ptrips->trip_points[i].cdev_name[j], tmp_str); - } - } - return ptrips; - -err_parse_dt: - dev_err(&pdev->dev, "Parsing device tree data error.\n"); - return NULL; -} -#else -static inline struct db8500_thsens_platform_data* - db8500_thermal_parse_dt(struct platform_device *pdev) -{ - return NULL; -} -#endif - static int db8500_thermal_probe(struct platform_device *pdev) { - struct db8500_thermal_zone *pzone = NULL; - struct db8500_thsens_platform_data *ptrips = NULL; - struct device_node *np = pdev->dev.of_node; + struct db8500_thermal_zone *th = NULL; + struct device *dev = &pdev->dev; int low_irq, high_irq, ret = 0; - unsigned long dft_low, dft_high; - if (np) - ptrips = db8500_thermal_parse_dt(pdev); - else - ptrips = dev_get_platdata(&pdev->dev); - - if (!ptrips) - return -EINVAL; - - pzone = devm_kzalloc(&pdev->dev, sizeof(*pzone), GFP_KERNEL); - if (!pzone) + th = devm_kzalloc(dev, sizeof(*th), GFP_KERNEL); + if (!th) return -ENOMEM; - mutex_init(&pzone->th_lock); - mutex_lock(&pzone->th_lock); - - pzone->mode = THERMAL_DEVICE_DISABLED; - pzone->trip_tab = ptrips; - - INIT_WORK(&pzone->therm_work, db8500_thermal_work); - low_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_LOW"); if (low_irq < 0) { - dev_err(&pdev->dev, "Get IRQ_HOTMON_LOW failed.\n"); - ret = low_irq; - goto out_unlock; + dev_err(dev, "Get IRQ_HOTMON_LOW failed\n"); + return low_irq; } - ret = devm_request_threaded_irq(&pdev->dev, low_irq, NULL, + ret = devm_request_threaded_irq(dev, low_irq, NULL, prcmu_low_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT, - "dbx500_temp_low", pzone); + "dbx500_temp_low", th); if (ret < 0) { - dev_err(&pdev->dev, "Failed to allocate temp low irq.\n"); - goto out_unlock; + dev_err(dev, "failed to allocate temp low irq\n"); + return ret; } high_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_HIGH"); if (high_irq < 0) { - dev_err(&pdev->dev, "Get IRQ_HOTMON_HIGH failed.\n"); - ret = high_irq; - goto out_unlock; + dev_err(dev, "Get IRQ_HOTMON_HIGH failed\n"); + return high_irq; } - ret = devm_request_threaded_irq(&pdev->dev, high_irq, NULL, + ret = devm_request_threaded_irq(dev, high_irq, NULL, prcmu_high_irq_handler, IRQF_NO_SUSPEND | IRQF_ONESHOT, - "dbx500_temp_high", pzone); + "dbx500_temp_high", th); if (ret < 0) { - dev_err(&pdev->dev, "Failed to allocate temp high irq.\n"); - goto out_unlock; + dev_err(dev, "failed to allocate temp high irq\n"); + return ret; } - pzone->therm_dev = thermal_zone_device_register("db8500_thermal_zone", - ptrips->num_trips, 0, pzone, &thdev_ops, NULL, 0, 0); - - if (IS_ERR(pzone->therm_dev)) { - dev_err(&pdev->dev, "Register thermal zone device failed.\n"); - ret = PTR_ERR(pzone->therm_dev); - goto out_unlock; + /* register of thermal sensor and get info from DT */ + th->tz = devm_thermal_zone_of_sensor_register(dev, 0, th, &thdev_ops); + if (IS_ERR(th->tz)) { + dev_err(dev, "register thermal zone sensor failed\n"); + return PTR_ERR(th->tz); } - dev_info(&pdev->dev, "Thermal zone device registered.\n"); - - dft_low = PRCMU_DEFAULT_LOW_TEMP; - dft_high = ptrips->trip_points[0].temp; - - db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE, - dft_low, dft_high); - - platform_set_drvdata(pdev, pzone); - pzone->mode = THERMAL_DEVICE_ENABLED; + dev_info(dev, "thermal zone sensor registered\n"); -out_unlock: - mutex_unlock(&pzone->th_lock); + /* Start measuring at the lowest point */ + db8500_thermal_update_config(th, 0, THERMAL_TREND_STABLE, + PRCMU_DEFAULT_LOW_TEMP, + db8500_thermal_points[0]); - return ret; -} - -static int db8500_thermal_remove(struct platform_device *pdev) -{ - struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev); - - thermal_zone_device_unregister(pzone->therm_dev); - cancel_work_sync(&pzone->therm_work); - mutex_destroy(&pzone->th_lock); + platform_set_drvdata(pdev, th); return 0; } @@ -475,9 +222,6 @@ static int db8500_thermal_remove(struct platform_device *pdev) static int db8500_thermal_suspend(struct platform_device *pdev, pm_message_t state) { - struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev); - - flush_work(&pzone->therm_work); prcmu_stop_temp_sense(); return 0; @@ -485,26 +229,21 @@ static int db8500_thermal_suspend(struct platform_device *pdev, static int db8500_thermal_resume(struct platform_device *pdev) { - struct db8500_thermal_zone *pzone = platform_get_drvdata(pdev); - struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; - unsigned long dft_low, dft_high; - - dft_low = PRCMU_DEFAULT_LOW_TEMP; - dft_high = ptrips->trip_points[0].temp; + struct db8500_thermal_zone *th = platform_get_drvdata(pdev); - db8500_thermal_update_config(pzone, 0, THERMAL_TREND_STABLE, - dft_low, dft_high); + /* Resume and start measuring at the lowest point */ + db8500_thermal_update_config(th, 0, THERMAL_TREND_STABLE, + PRCMU_DEFAULT_LOW_TEMP, + db8500_thermal_points[0]); return 0; } -#ifdef CONFIG_OF static const struct of_device_id db8500_thermal_match[] = { { .compatible = "stericsson,db8500-thermal" }, {}, }; MODULE_DEVICE_TABLE(of, db8500_thermal_match); -#endif static struct platform_driver db8500_thermal_driver = { .driver = { @@ -514,7 +253,6 @@ static struct platform_driver db8500_thermal_driver = { .probe = db8500_thermal_probe, .suspend = db8500_thermal_suspend, .resume = db8500_thermal_resume, - .remove = db8500_thermal_remove, }; module_platform_driver(db8500_thermal_driver); diff --git a/drivers/thermal/thermal_mmio.c b/drivers/thermal/thermal_mmio.c index de3cceea23bc..40524fa13533 100644 --- a/drivers/thermal/thermal_mmio.c +++ b/drivers/thermal/thermal_mmio.c @@ -53,13 +53,6 @@ static int thermal_mmio_probe(struct platform_device *pdev) return -ENOMEM; resource = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (IS_ERR(resource)) { - dev_err(&pdev->dev, - "fail to get platform memory resource (%ld)\n", - PTR_ERR(resource)); - return PTR_ERR(resource); - } - sensor->mmio_base = devm_ioremap_resource(&pdev->dev, resource); if (IS_ERR(sensor->mmio_base)) { dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 4e11de6cde81..5bae515c8e25 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -156,8 +156,10 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) /* balloon_append: add the given page to the balloon. */ -static void __balloon_append(struct page *page) +static void balloon_append(struct page *page) { + __SetPageOffline(page); + /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { list_add_tail(&page->lru, &ballooned_pages); @@ -169,11 +171,6 @@ static void __balloon_append(struct page *page) wake_up(&balloon_wq); } -static void balloon_append(struct page *page) -{ - __balloon_append(page); -} - /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ static struct page *balloon_retrieve(bool require_lowmem) { @@ -192,6 +189,7 @@ static struct page *balloon_retrieve(bool require_lowmem) else balloon_stats.balloon_low--; + __ClearPageOffline(page); return page; } @@ -377,8 +375,7 @@ static void xen_online_page(struct page *page, unsigned int order) for (i = 0; i < size; i++) { p = pfn_to_page(start_pfn + i); __online_page_set_limits(p); - __SetPageOffline(p); - __balloon_append(p); + balloon_append(p); } mutex_unlock(&balloon_mutex); } @@ -444,7 +441,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages) xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]); /* Relinquish the page back to the allocator. */ - __ClearPageOffline(page); free_reserved_page(page); } @@ -471,7 +467,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) state = BP_EAGAIN; break; } - __SetPageOffline(page); adjust_managed_page_count(page, -1); xenmem_reservation_scrub_page(page); list_add(&page->lru, &pages); @@ -611,7 +606,6 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages) while (pgno < nr_pages) { page = balloon_retrieve(true); if (page) { - __ClearPageOffline(page); pages[pgno++] = page; #ifdef CONFIG_XEN_HAVE_PVMMU /* @@ -653,10 +647,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) mutex_lock(&balloon_mutex); for (i = 0; i < nr_pages; i++) { - if (pages[i]) { - __SetPageOffline(pages[i]); + if (pages[i]) balloon_append(pages[i]); - } } balloon_stats.target_unpopulated -= nr_pages; @@ -674,7 +666,6 @@ static void __init balloon_add_region(unsigned long start_pfn, unsigned long pages) { unsigned long pfn, extra_pfn_end; - struct page *page; /* * If the amount of usable memory has been limited (e.g., with @@ -684,11 +675,10 @@ static void __init balloon_add_region(unsigned long start_pfn, extra_pfn_end = min(max_pfn, start_pfn + pages); for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { - page = pfn_to_page(pfn); /* totalram_pages and totalhigh_pages do not include the boot-time balloon extension, so don't subtract from it. */ - __balloon_append(page); + balloon_append(pfn_to_page(pfn)); } balloon_stats.total_pages += extra_pfn_end - start_pfn; diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index 89d60f8e3c18..d1ff2186ebb4 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -40,7 +40,7 @@ #define efi_data(op) (op.u.efi_runtime_call) -efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { struct xen_platform_op op = INIT_EFI_OP(get_time); @@ -61,9 +61,8 @@ efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_time); -efi_status_t xen_efi_set_time(efi_time_t *tm) +static efi_status_t xen_efi_set_time(efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_time); @@ -75,10 +74,10 @@ efi_status_t xen_efi_set_time(efi_time_t *tm) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_time); -efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, - efi_time_t *tm) +static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, + efi_bool_t *pending, + efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time); @@ -98,9 +97,8 @@ efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time); -efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time); @@ -117,11 +115,10 @@ efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time); -efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 *attr, unsigned long *data_size, - void *data) +static efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(get_variable); @@ -141,11 +138,10 @@ efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_variable); -efi_status_t xen_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) +static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) { struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name); @@ -165,11 +161,10 @@ efi_status_t xen_efi_get_next_variable(unsigned long *name_size, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_next_variable); -efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, - void *data) +static efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(set_variable); @@ -186,11 +181,10 @@ efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_variable); -efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size) +static efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) { struct xen_platform_op op = INIT_EFI_OP(query_variable_info); @@ -208,9 +202,8 @@ efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_query_variable_info); -efi_status_t xen_efi_get_next_high_mono_count(u32 *count) +static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) { struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count); @@ -221,10 +214,9 @@ efi_status_t xen_efi_get_next_high_mono_count(u32 *count) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count); -efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, unsigned long sg_list) +static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) { struct xen_platform_op op = INIT_EFI_OP(update_capsule); @@ -241,11 +233,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_update_capsule); -efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, u64 *max_size, - int *reset_type) +static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, int *reset_type) { struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities); @@ -264,10 +254,9 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); -void xen_efi_reset_system(int reset_type, efi_status_t status, - unsigned long data_size, efi_char16_t *data) +static void xen_efi_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) { switch (reset_type) { case EFI_RESET_COLD: @@ -281,4 +270,25 @@ void xen_efi_reset_system(int reset_type, efi_status_t status, BUG(); } } -EXPORT_SYMBOL_GPL(xen_efi_reset_system); + +/* + * Set XEN EFI runtime services function pointers. Other fields of struct efi, + * e.g. efi.systab, will be set like normal EFI. + */ +void __init xen_efi_runtime_setup(void) +{ + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.set_variable_nonblocking = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.query_variable_info_nonblocking = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = xen_efi_reset_system; +} diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 08adc590f631..597af455a522 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -55,6 +55,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/miscdevice.h> +#include <linux/workqueue.h> #include <xen/xenbus.h> #include <xen/xen.h> @@ -116,6 +117,8 @@ struct xenbus_file_priv { wait_queue_head_t read_waitq; struct kref kref; + + struct work_struct wq; }; /* Read out any raw xenbus messages queued up. */ @@ -300,14 +303,14 @@ static void watch_fired(struct xenbus_watch *watch, mutex_unlock(&adap->dev_data->reply_mutex); } -static void xenbus_file_free(struct kref *kref) +static void xenbus_worker(struct work_struct *wq) { struct xenbus_file_priv *u; struct xenbus_transaction_holder *trans, *tmp; struct watch_adapter *watch, *tmp_watch; struct read_buffer *rb, *tmp_rb; - u = container_of(kref, struct xenbus_file_priv, kref); + u = container_of(wq, struct xenbus_file_priv, wq); /* * No need for locking here because there are no other users, @@ -333,6 +336,18 @@ static void xenbus_file_free(struct kref *kref) kfree(u); } +static void xenbus_file_free(struct kref *kref) +{ + struct xenbus_file_priv *u; + + /* + * We might be called in xenbus_thread(). + * Use workqueue to avoid deadlock. + */ + u = container_of(kref, struct xenbus_file_priv, kref); + schedule_work(&u->wq); +} + static struct xenbus_transaction_holder *xenbus_get_transaction( struct xenbus_file_priv *u, uint32_t tx_id) { @@ -650,6 +665,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) INIT_LIST_HEAD(&u->watches); INIT_LIST_HEAD(&u->read_buffers); init_waitqueue_head(&u->read_waitq); + INIT_WORK(&u->wq, xenbus_worker); mutex_init(&u->reply_mutex); mutex_init(&u->msgbuffer_mutex); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index bcd1bafb0278..4150280509ff 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -10,13 +10,6 @@ #include <linux/dns_resolver.h> #include "internal.h" -const struct file_operations afs_dynroot_file_operations = { - .open = dcache_dir_open, - .release = dcache_dir_close, - .iterate_shared = dcache_readdir, - .llseek = dcache_dir_lseek, -}; - /* * Probe to see if a cell may exist. This prevents positive dentries from * being created unnecessarily. diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 7b1c18c32f48..46d2d7cb461d 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -443,7 +443,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; if (root) { inode->i_op = &afs_dynroot_inode_operations; - inode->i_fop = &afs_dynroot_file_operations; + inode->i_fop = &simple_dir_operations; } else { inode->i_op = &afs_autocell_inode_operations; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9cdfabaeaa0b..759e0578012c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -910,7 +910,6 @@ extern int afs_silly_iput(struct dentry *, struct inode *); /* * dynroot.c */ -extern const struct file_operations afs_dynroot_file_operations; extern const struct inode_operations afs_dynroot_inode_operations; extern const struct dentry_operations afs_dynroot_dentry_operations; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7b32b6af322d..cceaf05aada2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3745,12 +3745,21 @@ err_unlock: static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; + struct btrfs_fs_info *fs_info; SetPageError(page); if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) return; /* + * If we error out, we should add back the dirty_metadata_bytes + * to make it consistent. + */ + fs_info = eb->fs_info; + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, + eb->len, fs_info->dirty_metadata_batch); + + /* * If writeback for a btree extent that doesn't belong to a log tree * failed, increment the counter transaction->eb_write_errors. * We do this because while the transaction is running and before it's @@ -3986,6 +3995,10 @@ retry: if (!ret) { free_extent_buffer(eb); continue; + } else if (ret < 0) { + done = 1; + free_extent_buffer(eb); + break; } ret = write_one_eb(eb, wbc, &epd); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 8d3bd799ac7d..c4bb69941c77 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3166,9 +3166,6 @@ out: btrfs_free_path(path); mutex_lock(&fs_info->qgroup_rescan_lock); - if (!btrfs_fs_closing(fs_info)) - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; - if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; @@ -3184,16 +3181,30 @@ out: trans = btrfs_start_transaction(fs_info->quota_root, 1); if (IS_ERR(trans)) { err = PTR_ERR(trans); + trans = NULL; btrfs_err(fs_info, "fail to start transaction for status update: %d", err); - goto done; } - ret = update_qgroup_status_item(trans); - if (ret < 0) { - err = ret; - btrfs_err(fs_info, "fail to update qgroup status: %d", err); + + mutex_lock(&fs_info->qgroup_rescan_lock); + if (!btrfs_fs_closing(fs_info)) + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + if (trans) { + ret = update_qgroup_status_item(trans); + if (ret < 0) { + err = ret; + btrfs_err(fs_info, "fail to update qgroup status: %d", + err); + } } + fs_info->qgroup_rescan_running = false; + complete_all(&fs_info->qgroup_rescan_completion); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + if (!trans) + return; + btrfs_end_transaction(trans); if (btrfs_fs_closing(fs_info)) { @@ -3204,12 +3215,6 @@ out: } else { btrfs_err(fs_info, "qgroup scan failed with %d", err); } - -done: - mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = false; - mutex_unlock(&fs_info->qgroup_rescan_lock); - complete_all(&fs_info->qgroup_rescan_completion); } /* @@ -3437,6 +3442,9 @@ cleanup: while ((unode = ulist_next(&reserved->range_changed, &uiter))) clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); + /* Also free data bytes of already reserved one */ + btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, + orig_reserved, BTRFS_QGROUP_RSV_DATA); extent_changeset_release(reserved); return ret; } @@ -3481,7 +3489,7 @@ static int qgroup_free_reserved_data(struct inode *inode, * EXTENT_QGROUP_RESERVED, we won't double free. * So not need to rush. */ - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, + ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, free_start, free_start + free_len - 1, EXTENT_QGROUP_RESERVED, &changeset); if (ret < 0) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2f0e25afa486..00504657b602 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1435,6 +1435,13 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, int clear_rsv = 0; int ret; + /* + * The subvolume has reloc tree but the swap is finished, no need to + * create/update the dead reloc tree + */ + if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)) + return 0; + if (root->reloc_root) { reloc_root = root->reloc_root; reloc_root->last_trans = trans->transid; @@ -2187,7 +2194,6 @@ static int clean_dirty_subvols(struct reloc_control *rc) /* Merged subvolume, cleanup its reloc root */ struct btrfs_root *reloc_root = root->reloc_root; - clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); list_del_init(&root->reloc_dirty_list); root->reloc_root = NULL; if (reloc_root) { @@ -2196,6 +2202,7 @@ static int clean_dirty_subvols(struct reloc_control *rc) if (ret2 < 0 && !ret) ret = ret2; } + clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); btrfs_put_fs_root(root); } else { /* Orphan reloc tree, just clean it up */ diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index b5e80563efaa..99fe9bf3fdac 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -52,7 +52,13 @@ static struct file_system_type test_type = { struct inode *btrfs_new_test_inode(void) { - return new_inode(test_mnt->mnt_sb); + struct inode *inode; + + inode = new_inode(test_mnt->mnt_sb); + if (inode) + inode_init_owner(inode, NULL, S_IFREG); + + return inode; } static int btrfs_init_test_fs(void) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a324480bc88b..cdd7af424033 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4063,7 +4063,13 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, } num_devices = btrfs_num_devices(fs_info); - allowed = 0; + + /* + * SINGLE profile on-disk has no profile bit, but in-memory we have a + * special bit for it, to make it easier to distinguish. Thus we need + * to set it manually, or balance would refuse the profile. + */ + allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) if (num_devices >= btrfs_raid_array[i].devs_min) allowed |= btrfs_raid_array[i].bg_flag; diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index 6c3bd07868d7..0f0dc1c1fe41 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -57,9 +57,18 @@ struct smb_query_info { /* char buffer[]; */ } __packed; +struct smb3_key_debug_info { + __u64 Suid; + __u16 cipher_type; + __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */ + __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; + __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; +} __packed; + #define CIFS_IOCTL_MAGIC 0xCF #define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) #define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4) #define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info) #define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array) #define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info) +#define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info) diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index eb428349f29a..439b99cefeb0 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h @@ -90,8 +90,39 @@ struct cifs_acl { __le32 num_aces; } __attribute__((packed)); +/* ACE types - see MS-DTYP 2.4.4.1 */ +#define ACCESS_ALLOWED_ACE_TYPE 0x00 +#define ACCESS_DENIED_ACE_TYPE 0x01 +#define SYSTEM_AUDIT_ACE_TYPE 0x02 +#define SYSTEM_ALARM_ACE_TYPE 0x03 +#define ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04 +#define ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05 +#define ACCESS_DENIED_OBJECT_ACE_TYPE 0x06 +#define SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 +#define SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 +#define ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09 +#define ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A +#define ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B +#define ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C +#define SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D +#define SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E /* Reserved */ +#define SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F +#define SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10 /* reserved */ +#define SYSTEM_MANDATORY_LABEL_ACE_TYPE 0x11 +#define SYSTEM_RESOURCE_ATTRIBUTE_ACE_TYPE 0x12 +#define SYSTEM_SCOPED_POLICY_ID_ACE_TYPE 0x13 + +/* ACE flags */ +#define OBJECT_INHERIT_ACE 0x01 +#define CONTAINER_INHERIT_ACE 0x02 +#define NO_PROPAGATE_INHERIT_ACE 0x04 +#define INHERIT_ONLY_ACE 0x08 +#define INHERITED_ACE 0x10 +#define SUCCESSFUL_ACCESS_ACE_FLAG 0x40 +#define FAILED_ACCESS_ACE_FLAG 0x80 + struct cifs_ace { - __u8 type; + __u8 type; /* see above and MS-DTYP 2.4.4.1 */ __u8 flags; __le16 size; __le32 access_req; @@ -99,6 +130,54 @@ struct cifs_ace { } __attribute__((packed)); /* + * The current SMB3 form of security descriptor is similar to what was used for + * cifs (see above) but some fields are split, and fields in the struct below + * matches names of fields to the the spec, MS-DTYP (see sections 2.4.5 and + * 2.4.6). Note that "CamelCase" fields are used in this struct in order to + * match the MS-DTYP and MS-SMB2 specs which define the wire format. + */ +struct smb3_sd { + __u8 Revision; /* revision level, MUST be one */ + __u8 Sbz1; /* only meaningful if 'RM' flag set below */ + __le16 Control; + __le32 OffsetOwner; + __le32 OffsetGroup; + __le32 OffsetSacl; + __le32 OffsetDacl; +} __packed; + +/* Meaning of 'Control' field flags */ +#define ACL_CONTROL_SR 0x0001 /* Self relative */ +#define ACL_CONTROL_RM 0x0002 /* Resource manager control bits */ +#define ACL_CONTROL_PS 0x0004 /* SACL protected from inherits */ +#define ACL_CONTROL_PD 0x0008 /* DACL protected from inherits */ +#define ACL_CONTROL_SI 0x0010 /* SACL Auto-Inherited */ +#define ACL_CONTROL_DI 0x0020 /* DACL Auto-Inherited */ +#define ACL_CONTROL_SC 0x0040 /* SACL computed through inheritance */ +#define ACL_CONTROL_DC 0x0080 /* DACL computed through inheritence */ +#define ACL_CONTROL_SS 0x0100 /* Create server ACL */ +#define ACL_CONTROL_DT 0x0200 /* DACL provided by trusteed source */ +#define ACL_CONTROL_SD 0x0400 /* SACL defaulted */ +#define ACL_CONTROL_SP 0x0800 /* SACL is present on object */ +#define ACL_CONTROL_DD 0x1000 /* DACL defaulted */ +#define ACL_CONTROL_DP 0x2000 /* DACL is present on object */ +#define ACL_CONTROL_GD 0x4000 /* Group was defaulted */ +#define ACL_CONTROL_OD 0x8000 /* User was defaulted */ + +/* Meaning of AclRevision flags */ +#define ACL_REVISION 0x02 /* See section 2.4.4.1 of MS-DTYP */ +#define ACL_REVISION_DS 0x04 /* Additional AceTypes allowed */ + +struct smb3_acl { + u8 AclRevision; /* revision level */ + u8 Sbz1; /* MBZ */ + __le16 AclSize; + __le16 AceCount; + __le16 Sbz2; /* MBZ */ +} __packed; + + +/* * Minimum security identifier can be one for system defined Users * and Groups such as NULL SID and World or Built-in accounts such * as Administrator and Guest and consists of diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 54e204589cb9..2e960e1049db 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -331,8 +331,9 @@ struct smb_version_operations { umode_t mode, struct cifs_tcon *tcon, const char *full_path, struct cifs_sb_info *cifs_sb); - int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *, - struct cifs_sb_info *); + int (*mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, + struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *sb); /* set info on created directory */ void (*mkdir_setinfo)(struct inode *, const char *, struct cifs_sb_info *, struct cifs_tcon *, @@ -1209,6 +1210,7 @@ struct cifs_search_info { bool smallBuf:1; /* so we know which buf_release function to call */ }; +#define ACL_NO_MODE -1 struct cifs_open_parms { struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 99b1b1ef558c..e53e9f62b87b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -372,7 +372,8 @@ extern int CIFSSMBUnixSetPathInfo(const unsigned int xid, const struct nls_table *nls_codepage, int remap); -extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, +extern int CIFSSMBMkDir(const unsigned int xid, struct inode *inode, + umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index dbee2132e419..4f554f019a98 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1078,7 +1078,8 @@ RmDirRetry: } int -CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, +CIFSSMBMkDir(const unsigned int xid, struct inode *inode, umode_t mode, + struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { int rc = 0; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 26cdfbf1e164..3bae2e53f0b8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1622,13 +1622,14 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) } /* BB add setting the equivalent of mode via CreateX w/ACLs */ - rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb); + rc = server->ops->mkdir(xid, inode, mode, tcon, full_path, cifs_sb); if (rc) { cifs_dbg(FYI, "cifs_mkdir returned 0x%x\n", rc); d_drop(direntry); goto mkdir_out; } + /* TODO: skip this for smb2/smb3 */ rc = cifs_mkdir_qinfo(inode, direntry, mode, full_path, cifs_sb, tcon, xid); mkdir_out: diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 76ddd98b6298..1a01e108d75e 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -164,6 +164,7 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = file_inode(filep); + struct smb3_key_debug_info pkey_inf; int rc = -ENOTTY; /* strange error - but the precedent */ unsigned int xid; struct cifsFileInfo *pSMBFile = filep->private_data; @@ -270,6 +271,34 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) else rc = -EOPNOTSUPP; break; + case CIFS_DUMP_KEY: + if (pSMBFile == NULL) + break; + if (!capable(CAP_SYS_ADMIN)) { + rc = -EACCES; + break; + } + + tcon = tlink_tcon(pSMBFile->tlink); + if (!smb3_encryption_required(tcon)) { + rc = -EOPNOTSUPP; + break; + } + pkey_inf.cipher_type = + le16_to_cpu(tcon->ses->server->cipher_type); + pkey_inf.Suid = tcon->ses->Suid; + memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response, + 16 /* SMB2_NTLMV2_SESSKEY_SIZE */); + memcpy(pkey_inf.smb3decryptionkey, + tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE); + memcpy(pkey_inf.smb3encryptionkey, + tcon->ses->smb3encryptionkey, SMB3_SIGN_KEY_SIZE); + if (copy_to_user((void __user *)arg, &pkey_inf, + sizeof(struct smb3_key_debug_info))) + rc = -EFAULT; + else + rc = 0; + break; default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 4c764ff7edd2..85bd644f9773 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -698,7 +698,6 @@ sess_auth_lanman(struct sess_data *sess_data) char *bcc_ptr; struct cifs_ses *ses = sess_data->ses; char lnm_session_key[CIFS_AUTH_RESP_SIZE]; - __u32 capabilities; __u16 bytes_remaining; /* lanman 2 style sessionsetup */ @@ -709,7 +708,7 @@ sess_auth_lanman(struct sess_data *sess_data) pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; bcc_ptr = sess_data->iov[2].iov_base; - capabilities = cifs_ssetup_hdr(ses, pSMB); + (void)cifs_ssetup_hdr(ses, pSMB); pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index d2a3fb7e5c8d..4121ac1163ca 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -51,7 +51,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, __u32 desired_access, __u32 create_disposition, - __u32 create_options, void *ptr, int command, + __u32 create_options, umode_t mode, void *ptr, int command, struct cifsFileInfo *cfile) { int rc; @@ -103,6 +103,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = mode; memset(&open_iov, 0, sizeof(open_iov)); rqst[num_rqst].rq_iov = open_iov; @@ -478,7 +479,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, - smb2_data, SMB2_OP_QUERY_INFO, cfile); + ACL_NO_MODE, smb2_data, SMB2_OP_QUERY_INFO, cfile); if (rc == -EOPNOTSUPP) { *symlink = true; create_options |= OPEN_REPARSE_POINT; @@ -486,8 +487,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, /* Failed on a symbolic link - query a reparse point info */ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, - create_options, smb2_data, - SMB2_OP_QUERY_INFO, NULL); + create_options, ACL_NO_MODE, + smb2_data, SMB2_OP_QUERY_INFO, NULL); } if (rc) goto out; @@ -499,12 +500,14 @@ out: } int -smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, +smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode, + struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, - CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR, NULL); + CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR, + NULL); } void @@ -525,8 +528,8 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, cifs_get_writable_path(tcon, name, &cfile); tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, - CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO, - cfile); + CREATE_NOT_FILE, ACL_NO_MODE, + &data, SMB2_OP_SET_INFO, cfile); if (tmprc == 0) cifs_i->cifsAttrs = dosattrs; } @@ -536,7 +539,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, - CREATE_NOT_FILE, + CREATE_NOT_FILE, ACL_NO_MODE, NULL, SMB2_OP_RMDIR, NULL); } @@ -546,7 +549,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, { return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, - NULL, SMB2_OP_DELETE, NULL); + ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL); } static int @@ -564,7 +567,8 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, goto smb2_rename_path; } rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access, - FILE_OPEN, 0, smb2_to_name, command, cfile); + FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name, + command, cfile); smb2_rename_path: kfree(smb2_to_name); return rc; @@ -601,8 +605,8 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, __le64 eof = cpu_to_le64(size); return smb2_compound_op(xid, tcon, cifs_sb, full_path, - FILE_WRITE_DATA, FILE_OPEN, 0, &eof, - SMB2_OP_SET_EOF, NULL); + FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE, + &eof, SMB2_OP_SET_EOF, NULL); } int @@ -623,8 +627,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path, return PTR_ERR(tlink); rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path, - FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf, - SMB2_OP_SET_INFO, NULL); + FILE_WRITE_ATTRIBUTES, FILE_OPEN, + 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, NULL); cifs_put_tlink(tlink); return rc; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index eaed18061314..4c0922596467 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -751,6 +751,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) goto oshr_exit; } + atomic_inc(&tcon->num_remote_opens); + o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; oparms.fid->persistent_fid = o_rsp->PersistentFileId; oparms.fid->volatile_fid = o_rsp->VolatileFileId; @@ -1176,6 +1178,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rc = compound_send_recv(xid, ses, flags, 3, rqst, resp_buftype, rsp_iov); + /* no need to bump num_remote_opens because handle immediately closed */ sea_exit: kfree(ea); @@ -1518,6 +1521,8 @@ smb2_ioctl_query_info(const unsigned int xid, resp_buftype, rsp_iov); if (rc) goto iqinf_exit; + + /* No need to bump num_remote_opens since handle immediately closed */ if (qi.flags & PASSTHRU_FSCTL) { pqi = (struct smb_query_info __user *)arg; io_rsp = (struct smb2_ioctl_rsp *)rsp_iov[1].iov_base; @@ -3328,6 +3333,11 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) return; + /* Check if the server granted an oplock rather than a lease */ + if (oplock & SMB2_OPLOCK_LEVEL_EXCLUSIVE) + return smb2_set_oplock_level(cinode, oplock, epoch, + purge_cache); + if (oplock & SMB2_LEASE_READ_CACHING_HE) { new_oplock |= CIFS_CACHE_READ_FLG; strcat(message, "R"); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 87066f1af12c..85f9d614d968 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -751,6 +751,8 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode) unsigned int num = *num_iovec; iov[num].iov_base = create_posix_buf(mode); + if (mode == -1) + cifs_dbg(VFS, "illegal mode\n"); /* BB REMOVEME */ if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = sizeof(struct create_posix); @@ -2352,6 +2354,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, rqst.rq_iov = iov; rqst.rq_nvec = n_iov; + /* no need to inc num_remote_opens because we close it just below */ trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES); /* resource #4: response buffer */ @@ -2416,6 +2419,7 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, /* File attributes ignored on open (used in create though) */ req->FileAttributes = cpu_to_le32(file_attributes); req->ShareAccess = FILE_SHARE_ALL_LE; + req->CreateDisposition = cpu_to_le32(oparms->disposition); req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK); req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req)); @@ -2517,6 +2521,23 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, return rc; } + /* TODO: add handling for the mode on create */ + if (oparms->disposition == FILE_CREATE) + cifs_dbg(VFS, "mode is 0x%x\n", oparms->mode); /* BB REMOVEME */ + + if ((oparms->disposition == FILE_CREATE) && (oparms->mode != -1)) { + if (n_iov > 2) { + struct create_context *ccontext = + (struct create_context *)iov[n_iov-1].iov_base; + ccontext->Next = + cpu_to_le32(iov[n_iov-1].iov_len); + } + + /* rc = add_sd_context(iov, &n_iov, oparms->mode); */ + if (rc) + return rc; + } + if (n_iov > 2) { struct create_context *ccontext = (struct create_context *)iov[n_iov-1].iov_base; @@ -3180,7 +3201,7 @@ SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, * See MS-SMB2 2.2.35 and 2.2.36 */ -int +static int SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 completion_filter, bool watch_tree) diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 67a91b11fd59..da3a6d580808 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -84,7 +84,8 @@ extern int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *full_path, struct cifs_sb_info *cifs_sb); -extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, +extern int smb2_mkdir(const unsigned int xid, struct inode *inode, + umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, struct cifs_sb_info *cifs_sb, diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 08628e6a42ac..1ff28529cf4b 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -144,6 +144,17 @@ #define IO_REPARSE_APPXSTREAM 0xC0000014 /* NFS symlinks, Win 8/SMB3 and later */ #define IO_REPARSE_TAG_NFS 0x80000014 +/* + * AzureFileSync - see + * https://docs.microsoft.com/en-us/azure/storage/files/storage-sync-cloud-tiering + */ +#define IO_REPARSE_TAG_AZ_FILE_SYNC 0x8000001e +/* WSL reparse tags */ +#define IO_REPARSE_TAG_LX_SYMLINK 0xA000001D +#define IO_REPARSE_TAG_AF_UNIX 0x80000023 +#define IO_REPARSE_TAG_LX_FIFO 0x80000024 +#define IO_REPARSE_TAG_LX_CHR 0x80000025 +#define IO_REPARSE_TAG_LX_BLK 0x80000026 /* fsctl flags */ /* If Flags is set to this value, the request is an FSCTL not ioctl request */ diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 9076150758d8..db4ba8f6077e 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -31,7 +31,7 @@ #include "cifs_fs_sb.h" #include "cifs_unicode.h" -#define MAX_EA_VALUE_SIZE 65535 +#define MAX_EA_VALUE_SIZE CIFSMaxBufSize #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" #define CIFS_XATTR_ATTRIB "cifs.dosattrib" /* full name: user.cifs.dosattrib */ #define CIFS_XATTR_CREATETIME "cifs.creationtime" /* user.cifs.creationtime */ diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 8a9fcbd0e8ac..fc3a8d8064f8 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -34,11 +34,15 @@ static void erofs_readendio(struct bio *bio) struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) { - struct inode *const bd_inode = sb->s_bdev->bd_inode; - struct address_space *const mapping = bd_inode->i_mapping; + struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; + struct page *page; - return read_cache_page_gfp(mapping, blkaddr, + page = read_cache_page_gfp(mapping, blkaddr, mapping_gfp_constraint(mapping, ~__GFP_FS)); + /* should already be PageUptodate */ + if (!IS_ERR(page)) + lock_page(page); + return page; } static int erofs_map_blocks_flatmode(struct inode *inode, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index caf9a95173b0..0e369494f2f2 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -105,9 +105,9 @@ static int erofs_read_superblock(struct super_block *sb) int ret; page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); - if (!page) { + if (IS_ERR(page)) { erofs_err(sb, "cannot read erofs superblock"); - return -EIO; + return PTR_ERR(page); } sbi = EROFS_SB(sb); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 96e34c90f814..fad80c97d247 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -575,7 +575,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct erofs_map_blocks *const map = &fe->map; struct z_erofs_collector *const clt = &fe->clt; const loff_t offset = page_offset(page); - bool tight = (clt->mode >= COLLECT_PRIMARY_HOOKED); + bool tight = true; enum z_erofs_cache_alloctype cache_strategy; enum z_erofs_page_type page_type; @@ -628,8 +628,16 @@ restart_now: preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy, pagepool); - tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED); hitted: + /* + * Ensure the current partial page belongs to this submit chain rather + * than other concurrent submit chains or the noio(bypass) chain since + * those chains are handled asynchronously thus the page cannot be used + * for inplace I/O or pagevec (should be processed in strict order.) + */ + tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && + clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + cur = end - min_t(unsigned int, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, cur, end); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 123e3dee7733..516faa280ced 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4551,6 +4551,7 @@ static int __ext4_get_inode_loc(struct inode *inode, struct buffer_head *bh; struct super_block *sb = inode->i_sb; ext4_fsblk_t block; + struct blk_plug plug; int inodes_per_block, inode_offset; iloc->bh = NULL; @@ -4639,6 +4640,7 @@ make_io: * If we need to do any I/O, try to pre-readahead extra * blocks from the inode table. */ + blk_start_plug(&plug); if (EXT4_SB(sb)->s_inode_readahead_blks) { ext4_fsblk_t b, end, table; unsigned num; @@ -4669,6 +4671,7 @@ make_io: get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); + blk_finish_plug(&plug); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/fhandle.c b/fs/fhandle.c index 0ee727485615..01263ffbc4c0 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -246,7 +246,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, * sys_open_by_handle_at: Open the file handle * @mountdirfd: directory file descriptor * @handle: file handle to be opened - * @flag: open flags. + * @flags: open flags. * * @mountdirfd indicate the directory file descriptor * of the mount point. file handle is decoded relative diff --git a/fs/io_uring.c b/fs/io_uring.c index aa8ac557493c..8a0381f1a43b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1892,15 +1892,15 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) unsigned count, req_dist, tail_index; struct io_ring_ctx *ctx = req->ctx; struct list_head *entry; - struct timespec ts; + struct timespec64 ts; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags || sqe->len != 1) return -EINVAL; - if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr, - sizeof(ts))) + + if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr))) return -EFAULT; /* @@ -1934,7 +1934,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); req->timeout.timer.function = io_timeout_fn; - hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts), + hrtimer_start(&req->timeout.timer, timespec64_to_ktime(ts), HRTIMER_MODE_REL); return 0; } diff --git a/fs/statfs.c b/fs/statfs.c index eea7af6f2f22..2616424012ea 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -318,19 +318,10 @@ COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) { struct compat_statfs64 buf; - if (sizeof(ubuf->f_bsize) == 4) { - if ((kbuf->f_type | kbuf->f_bsize | kbuf->f_namelen | - kbuf->f_frsize | kbuf->f_flags) & 0xffffffff00000000ULL) - return -EOVERFLOW; - /* f_files and f_ffree may be -1; it's okay - * to stuff that into 32 bits */ - if (kbuf->f_files != 0xffffffffffffffffULL - && (kbuf->f_files & 0xffffffff00000000ULL)) - return -EOVERFLOW; - if (kbuf->f_ffree != 0xffffffffffffffffULL - && (kbuf->f_ffree & 0xffffffff00000000ULL)) - return -EOVERFLOW; - } + + if ((kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) + return -EOVERFLOW; + memset(&buf, 0, sizeof(struct compat_statfs64)); buf.f_type = kbuf->f_type; buf.f_bsize = kbuf->f_bsize; diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf074bce3eb3..c94a9ff9f082 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,6 +4,13 @@ #include <asm/types.h> #include <linux/bits.h> +/* Set bits in the first 'n' bytes when loaded from memory */ +#ifdef __LITTLE_ENDIAN +# define aligned_byte_mask(n) ((1UL << 8*(n))-1) +#else +# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) +#endif + #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 61c9ffd89b05..93d5cf0bc716 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -108,7 +108,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) return true; - + /* + * For dax vmas, try to always use hugepage mappings. If the kernel does + * not support hugepages, fsdax mappings will fallback to PAGE_SIZE + * mappings, and device-dax namespaces, that try to guarantee a given + * mapping size, will fail to enable + */ if (vma_is_dax(vma)) return true; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fcb46b3374c6..719fc3e15ea4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1090,6 +1090,7 @@ enum kvm_stat_kind { struct kvm_stat_data { int offset; + int mode; struct kvm *kvm; }; @@ -1097,6 +1098,7 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; + int mode; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index bef51e35d8d2..6fefb09af7c3 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -17,6 +17,7 @@ struct device; */ struct vmem_altmap { const unsigned long base_pfn; + const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; diff --git a/include/linux/platform_data/db8500_thermal.h b/include/linux/platform_data/db8500_thermal.h deleted file mode 100644 index 55e55750a165..000000000000 --- a/include/linux/platform_data/db8500_thermal.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * db8500_thermal.h - DB8500 Thermal Management Implementation - * - * Copyright (C) 2012 ST-Ericsson - * Copyright (C) 2012 Linaro Ltd. - * - * Author: Hongbo Zhang <hongbo.zhang@linaro.com> - */ - -#ifndef _DB8500_THERMAL_H_ -#define _DB8500_THERMAL_H_ - -#include <linux/thermal.h> - -#define COOLING_DEV_MAX 8 - -struct db8500_trip_point { - unsigned long temp; - enum thermal_trip_type type; - char cdev_name[COOLING_DEV_MAX][THERMAL_NAME_LENGTH]; -}; - -struct db8500_thsens_platform_data { - struct db8500_trip_point trip_points[THERMAL_MAX_TRIPS]; - int num_trips; -}; - -#endif /* _DB8500_THERMAL_H_ */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70bbdc38dc37..e47d0522a1f4 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -231,6 +231,76 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, #endif /* ARCH_HAS_NOCACHE_UACCESS */ +extern __must_check int check_zeroed_user(const void __user *from, size_t size); + +/** + * copy_struct_from_user: copy a struct from userspace + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @src: Source address, in userspace. + * @usize: (Alleged) size of @src struct. + * + * Copies a struct from userspace to kernel space, in a way that guarantees + * backwards-compatibility for struct syscall arguments (as long as future + * struct extensions are made such that all new fields are *appended* to the + * old struct, and zeroed-out new fields have the same meaning as the old + * struct). + * + * @ksize is just sizeof(*dst), and @usize should've been passed by userspace. + * The recommended usage is something like the following: + * + * SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize) + * { + * int err; + * struct foo karg = {}; + * + * if (usize > PAGE_SIZE) + * return -E2BIG; + * if (usize < FOO_SIZE_VER0) + * return -EINVAL; + * + * err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); + * if (err) + * return err; + * + * // ... + * } + * + * There are three cases to consider: + * * If @usize == @ksize, then it's copied verbatim. + * * If @usize < @ksize, then the userspace has passed an old struct to a + * newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize) + * are to be zero-filled. + * * If @usize > @ksize, then the userspace has passed a new struct to an + * older kernel. The trailing bytes unknown to the kernel (@usize - @ksize) + * are checked to ensure they are zeroed, otherwise -E2BIG is returned. + * + * Returns (in all cases, some data may have been copied): + * * -E2BIG: (@usize > @ksize) and there are non-zero trailing bytes in @src. + * * -EFAULT: access to userspace failed. + */ +static __always_inline __must_check int +copy_struct_from_user(void *dst, size_t ksize, const void __user *src, + size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + /* Deal with trailing bytes. */ + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + int ret = check_zeroed_user(src + size, rest); + if (ret <= 0) + return ret ?: -E2BIG; + } + /* Copy the interoperable parts of the struct. */ + if (copy_from_user(dst, src, size)) + return -EFAULT; + return 0; +} + /* * probe_kernel_read(): safely attempt to read from a location * @dst: pointer to the buffer that shall take the data diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eb57e3037deb..69e8bb8963db 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -35,8 +35,8 @@ DECLARE_EVENT_CLASS(kmem_alloc, __entry->gfp_flags = gfp_flags; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", + (void *)__entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, @@ -131,7 +131,8 @@ DECLARE_EVENT_CLASS(kmem_free, __entry->ptr = ptr; ), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_printk("call_site=%pS ptr=%p", + (void *)__entry->call_site, __entry->ptr) ); DEFINE_EVENT(kmem_free, kfree, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c99b4f2482c6..4fe35d600ab8 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1003,6 +1003,8 @@ struct drm_amdgpu_info_device { __u64 high_va_max; /* gfx10 pa_sc_tile_steering_override */ __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; }; struct drm_amdgpu_info_hw_ip { diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 1c215ea1798e..e168dc59e9a0 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -45,6 +45,27 @@ struct nvme_passthru_cmd { __u32 result; }; +struct nvme_passthru_cmd64 { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u64 result; +}; + #define nvme_admin_cmd nvme_passthru_cmd #define NVME_IOCTL_ID _IO('N', 0x40) @@ -54,5 +75,7 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_RESET _IO('N', 0x44) #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) #define NVME_IOCTL_RESCAN _IO('N', 0x46) +#define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ diff --git a/include/uapi/linux/pg.h b/include/uapi/linux/pg.h index 364c350e85cd..62b6f69bd9fb 100644 --- a/include/uapi/linux/pg.h +++ b/include/uapi/linux/pg.h @@ -35,6 +35,9 @@ */ +#ifndef _UAPI_LINUX_PG_H +#define _UAPI_LINUX_PG_H + #define PG_MAGIC 'P' #define PG_RESET 'Z' #define PG_COMMAND 'C' @@ -61,4 +64,4 @@ struct pg_read_hdr { }; -/* end of pg.h */ +#endif /* _UAPI_LINUX_PG_H */ diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index b3105ac1381a..99335e1f4a27 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -33,8 +33,31 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ -/* - * Arguments for the clone3 syscall +#ifndef __ASSEMBLY__ +/** + * struct clone_args - arguments for the clone3 syscall + * @flags: Flags for the new process as listed above. + * All flags are valid except for CSIGNAL and + * CLONE_DETACHED. + * @pidfd: If CLONE_PIDFD is set, a pidfd will be + * returned in this argument. + * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the + * child process will be returned in the child's + * memory. + * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of + * the child process will be returned in the + * parent's memory. + * @exit_signal: The exit_signal the parent process will be + * sent when the child exits. + * @stack: Specify the location of the stack for the + * child process. + * @stack_size: The size of the stack for the child process. + * @tls: If CLONE_SETTLS is set, the tls descriptor + * is set to tls. + * + * The structure is versioned by size and thus extensible. + * New struct members must go at the end of the struct and + * must be properly 64bit aligned. */ struct clone_args { __aligned_u64 flags; @@ -46,6 +69,9 @@ struct clone_args { __aligned_u64 stack_size; __aligned_u64 tls; }; +#endif + +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ /* * Scheduling policies diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 98b30c1613b2..d89969aa9942 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -212,30 +212,7 @@ int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr, bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); -efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc); -efi_status_t xen_efi_set_time(efi_time_t *tm); -efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, - efi_time_t *tm); -efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm); -efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 *attr, unsigned long *data_size, - void *data); -efi_status_t xen_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, efi_guid_t *vendor); -efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, - void *data); -efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size); -efi_status_t xen_efi_get_next_high_mono_count(u32 *count); -efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, unsigned long sg_list); -efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, u64 *max_size, - int *reset_type); -void xen_efi_reset_system(int reset_type, efi_status_t status, - unsigned long data_size, efi_char16_t *data); +void xen_efi_runtime_setup(void); #ifdef CONFIG_PREEMPT diff --git a/kernel/events/core.c b/kernel/events/core.c index 4655adbbae10..3f0cb82e4fbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10586,55 +10586,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, u32 size; int ret; - if (!access_ok(uattr, PERF_ATTR_SIZE_VER0)) - return -EFAULT; - - /* - * zero the full structure, so that a short copy will be nice. - */ + /* Zero the full structure, so that a short copy will be nice. */ memset(attr, 0, sizeof(*attr)); ret = get_user(size, &uattr->size); if (ret) return ret; - if (size > PAGE_SIZE) /* silly large */ - goto err_size; - - if (!size) /* abi compat */ + /* ABI compatibility quirk: */ + if (!size) size = PERF_ATTR_SIZE_VER0; - - if (size < PERF_ATTR_SIZE_VER0) + if (size < PERF_ATTR_SIZE_VER0 || size > PAGE_SIZE) goto err_size; - /* - * If we're handed a bigger struct than we know of, - * ensure all the unknown bits are 0 - i.e. new - * user-space does not rely on any kernel feature - * extensions we dont know about yet. - */ - if (size > sizeof(*attr)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uattr + sizeof(*attr); - end = (void __user *)uattr + size; - - for (; addr < end; addr++) { - ret = get_user(val, addr); - if (ret) - return ret; - if (val) - goto err_size; - } - size = sizeof(*attr); + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); + if (ret) { + if (ret == -E2BIG) + goto err_size; + return ret; } - ret = copy_from_user(attr, uattr, size); - if (ret) - return -EFAULT; - attr->size = size; if (attr->__reserved_1) diff --git a/kernel/fork.c b/kernel/fork.c index f9572f416126..1f6c45f6a734 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2525,39 +2525,19 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, #ifdef __ARCH_WANT_SYS_CLONE3 noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, struct clone_args __user *uargs, - size_t size) + size_t usize) { + int err; struct clone_args args; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(usize > PAGE_SIZE)) return -E2BIG; - - if (unlikely(size < sizeof(struct clone_args))) + if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) return -EINVAL; - if (unlikely(!access_ok(uargs, size))) - return -EFAULT; - - if (size > sizeof(struct clone_args)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uargs + sizeof(struct clone_args); - end = (void __user *)uargs + size; - - for (; addr < end; addr++) { - if (get_user(val, addr)) - return -EFAULT; - if (val) - return -E2BIG; - } - - size = sizeof(struct clone_args); - } - - if (copy_from_user(&args, uargs, size)) - return -EFAULT; + err = copy_struct_from_user(&args, sizeof(args), uargs, usize); + if (err) + return err; /* * Verify that higher 32bits of exit_signal are unset and that @@ -2604,6 +2584,17 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs) return true; } +/** + * clone3 - create a new process with specific properties + * @uargs: argument structure + * @size: size of @uargs + * + * clone3() is the extensible successor to clone()/clone2(). + * It takes a struct as argument that is versioned by its size. + * + * Return: On success, a positive PID for the child process. + * On error, a negative errno number. + */ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) { int err; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7880f4f64d0e..dd05a378631a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5106,9 +5106,6 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a u32 size; int ret; - if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0)) - return -EFAULT; - /* Zero the full structure, so that a short copy will be nice: */ memset(attr, 0, sizeof(*attr)); @@ -5116,45 +5113,19 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a if (ret) return ret; - /* Bail out on silly large: */ - if (size > PAGE_SIZE) - goto err_size; - /* ABI compatibility quirk: */ if (!size) size = SCHED_ATTR_SIZE_VER0; - - if (size < SCHED_ATTR_SIZE_VER0) + if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) goto err_size; - /* - * If we're handed a bigger struct than we know of, - * ensure all the unknown bits are 0 - i.e. new - * user-space does not rely on any kernel feature - * extensions we dont know about yet. - */ - if (size > sizeof(*attr)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uattr + sizeof(*attr); - end = (void __user *)uattr + size; - - for (; addr < end; addr++) { - ret = get_user(val, addr); - if (ret) - return ret; - if (val) - goto err_size; - } - size = sizeof(*attr); + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); + if (ret) { + if (ret == -E2BIG) + goto err_size; + return ret; } - ret = copy_from_user(attr, uattr, size); - if (ret) - return -EFAULT; - if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && size < SCHED_ATTR_SIZE_VER1) return -EINVAL; @@ -5354,7 +5325,7 @@ sched_attr_copy_to_user(struct sched_attr __user *uattr, * sys_sched_getattr - similar to sched_getparam, but with sched_attr * @pid: the pid in question. * @uattr: structure containing the extended parameters. - * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility. + * @usize: sizeof(attr) for fwd/bwd comp. * @flags: for future extension. */ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a39bed2c784f..168479a7d61b 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -174,7 +174,6 @@ static int membarrier_private_expedited(int flags) */ if (cpu == raw_smp_processor_id()) continue; - rcu_read_lock(); p = rcu_dereference(cpu_rq(cpu)->curr); if (p && p->mm == mm) __cpumask_set_cpu(cpu, tmpmask); diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index c1f5bb590b5e..b5a65e212df2 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -42,39 +42,39 @@ static int bc_shutdown(struct clock_event_device *evt) */ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) { - int bc_moved; /* - * We try to cancel the timer first. If the callback is on - * flight on some other cpu then we let it handle it. If we - * were able to cancel the timer nothing can rearm it as we - * own broadcast_lock. + * This is called either from enter/exit idle code or from the + * broadcast handler. In all cases tick_broadcast_lock is held. * - * However we can also be called from the event handler of - * ce_broadcast_hrtimer itself when it expires. We cannot - * restart the timer because we are in the callback, but we - * can set the expiry time and let the callback return - * HRTIMER_RESTART. + * hrtimer_cancel() cannot be called here neither from the + * broadcast handler nor from the enter/exit idle code. The idle + * code can run into the problem described in bc_shutdown() and the + * broadcast handler cannot wait for itself to complete for obvious + * reasons. * - * Since we are in the idle loop at this point and because - * hrtimer_{start/cancel} functions call into tracing, - * calls to these functions must be bound within RCU_NONIDLE. + * Each caller tries to arm the hrtimer on its own CPU, but if the + * hrtimer callbback function is currently running, then + * hrtimer_start() cannot move it and the timer stays on the CPU on + * which it is assigned at the moment. + * + * As this can be called from idle code, the hrtimer_start() + * invocation has to be wrapped with RCU_NONIDLE() as + * hrtimer_start() can call into tracing. */ - RCU_NONIDLE( - { - bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; - if (bc_moved) { - hrtimer_start(&bctimer, expires, - HRTIMER_MODE_ABS_PINNED_HARD); - } - } - ); - - if (bc_moved) { - /* Bind the "device" to the cpu */ - bc->bound_on = smp_processor_id(); - } else if (bc->bound_on == smp_processor_id()) { - hrtimer_set_expires(&bctimer, expires); - } + RCU_NONIDLE( { + hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD); + /* + * The core tick broadcast mode expects bc->bound_on to be set + * correctly to prevent a CPU which has the broadcast hrtimer + * armed from going deep idle. + * + * As tick_broadcast_lock is held, nothing can change the cpu + * base which was just established in hrtimer_start() above. So + * the below access is safe even without holding the hrtimer + * base lock. + */ + bc->bound_on = bctimer.base->cpu_base->cpu; + } ); return 0; } @@ -100,10 +100,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) { ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); - if (clockevent_state_oneshot(&ce_broadcast_hrtimer)) - if (ce_broadcast_hrtimer.next_event != KTIME_MAX) - return HRTIMER_RESTART; - return HRTIMER_NORESTART; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 26b0a08f3c7d..f801d154ff6a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -365,11 +365,11 @@ static inline struct trace_array *top_trace_array(void) __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN -#define IF_ASSIGN(var, entry, etype, id) \ - if (FTRACE_CMP_TYPE(var, etype)) { \ - var = (typeof(var))(entry); \ - WARN_ON(id && (entry)->type != id); \ - break; \ +#define IF_ASSIGN(var, entry, etype, id) \ + if (FTRACE_CMP_TYPE(var, etype)) { \ + var = (typeof(var))(entry); \ + WARN_ON(id != 0 && (entry)->type != id); \ + break; \ } /* Will cause compile errors if type is not found. */ diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c773b8fb270c..c9a74f82b14a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -452,8 +452,10 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, switch (*next) { case '(': /* #2 */ - if (top - op_stack > nr_parens) - return ERR_PTR(-EINVAL); + if (top - op_stack > nr_parens) { + ret = -EINVAL; + goto out_free; + } *(++top) = invert; continue; case '!': /* #3 */ diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index baf58a3612c0..905b10af5d5c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -178,6 +178,16 @@ void __trace_probe_log_err(int offset, int err_type) if (!command) return; + if (trace_probe_log.index >= trace_probe_log.argc) { + /** + * Set the error position is next to the last arg + space. + * Note that len includes the terminal null and the cursor + * appaers at pos + 1. + */ + pos = len; + offset = 0; + } + /* And make a command string from argv array */ p = command; for (i = 0; i < trace_probe_log.argc; i++) { @@ -1084,6 +1094,12 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b) { int i; + /* In case of more arguments */ + if (a->nr_args < b->nr_args) + return a->nr_args + 1; + if (a->nr_args > b->nr_args) + return b->nr_args + 1; + for (i = 0; i < a->nr_args; i++) { if ((b->nr_args <= i) || ((a->args[i].type != b->args[i].type) || diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 28ff554a1be8..6c0005d5dd5c 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -3,16 +3,10 @@ #include <linux/export.h> #include <linux/uaccess.h> #include <linux/mm.h> +#include <linux/bitops.h> #include <asm/word-at-a-time.h> -/* Set bits in the first 'n' bytes when loaded from memory */ -#ifdef __LITTLE_ENDIAN -# define aligned_byte_mask(n) ((1ul << 8*(n))-1) -#else -# define aligned_byte_mask(n) (~0xfful << (BITS_PER_LONG - 8 - 8*(n))) -#endif - /* * Do a strnlen, return length of string *with* final '\0'. * 'count' is the user-supplied count, while 'max' is the diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index 67bcd5dfd847..e365ace06538 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -31,14 +31,133 @@ # define TEST_U64 #endif -#define test(condition, msg) \ -({ \ - int cond = (condition); \ - if (cond) \ - pr_warn("%s\n", msg); \ - cond; \ +#define test(condition, msg, ...) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("[%d] " msg "\n", __LINE__, ##__VA_ARGS__); \ + cond; \ }) +static bool is_zeroed(void *from, size_t size) +{ + return memchr_inv(from, 0x0, size) == NULL; +} + +static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) +{ + int ret = 0; + size_t start, end, i; + size_t zero_start = size / 4; + size_t zero_end = size - zero_start; + + /* + * We conduct a series of check_nonzero_user() tests on a block of memory + * with the following byte-pattern (trying every possible [start,end] + * pair): + * + * [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ] + * + * And we verify that check_nonzero_user() acts identically to memchr_inv(). + */ + + memset(kmem, 0x0, size); + for (i = 1; i < zero_start; i += 2) + kmem[i] = 0xff; + for (i = zero_end; i < size; i += 2) + kmem[i] = 0xff; + + ret |= test(copy_to_user(umem, kmem, size), + "legitimate copy_to_user failed"); + + for (start = 0; start <= size; start++) { + for (end = start; end <= size; end++) { + size_t len = end - start; + int retval = check_zeroed_user(umem + start, len); + int expected = is_zeroed(kmem + start, len); + + ret |= test(retval != expected, + "check_nonzero_user(=%d) != memchr_inv(=%d) mismatch (start=%zu, end=%zu)", + retval, expected, start, end); + } + } + + return ret; +} + +static int test_copy_struct_from_user(char *kmem, char __user *umem, + size_t size) +{ + int ret = 0; + char *umem_src = NULL, *expected = NULL; + size_t ksize, usize; + + umem_src = kmalloc(size, GFP_KERNEL); + if ((ret |= test(umem_src == NULL, "kmalloc failed"))) + goto out_free; + + expected = kmalloc(size, GFP_KERNEL); + if ((ret |= test(expected == NULL, "kmalloc failed"))) + goto out_free; + + /* Fill umem with a fixed byte pattern. */ + memset(umem_src, 0x3e, size); + ret |= test(copy_to_user(umem, umem_src, size), + "legitimate copy_to_user failed"); + + /* Check basic case -- (usize == ksize). */ + ksize = size; + usize = size; + + memcpy(expected, umem_src, ksize); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize == ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize == ksize) gives unexpected copy"); + + /* Old userspace case -- (usize < ksize). */ + ksize = size; + usize = size / 2; + + memcpy(expected, umem_src, usize); + memset(expected + usize, 0x0, ksize - usize); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize < ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize < ksize) gives unexpected copy"); + + /* New userspace (-E2BIG) case -- (usize > ksize). */ + ksize = size / 2; + usize = size; + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize) != -E2BIG, + "copy_struct_from_user(usize > ksize) didn't give E2BIG"); + + /* New userspace (success) case -- (usize > ksize). */ + ksize = size / 2; + usize = size; + + memcpy(expected, umem_src, ksize); + ret |= test(clear_user(umem + ksize, usize - ksize), + "legitimate clear_user failed"); + + memset(kmem, 0x0, size); + ret |= test(copy_struct_from_user(kmem, ksize, umem, usize), + "copy_struct_from_user(usize > ksize) failed"); + ret |= test(memcmp(kmem, expected, ksize), + "copy_struct_from_user(usize > ksize) gives unexpected copy"); + +out_free: + kfree(expected); + kfree(umem_src); + return ret; +} + static int __init test_user_copy_init(void) { int ret = 0; @@ -106,6 +225,11 @@ static int __init test_user_copy_init(void) #endif #undef test_legit + /* Test usage of check_nonzero_user(). */ + ret |= test_check_nonzero_user(kmem, usermem, 2 * PAGE_SIZE); + /* Test usage of copy_struct_from_user(). */ + ret |= test_copy_struct_from_user(kmem, usermem, 2 * PAGE_SIZE); + /* * Invalid usage: none of these copies should succeed. */ diff --git a/lib/usercopy.c b/lib/usercopy.c index c2bfbcaeb3dc..cbb4d9ec00f2 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/uaccess.h> +#include <linux/bitops.h> /* out-of-line parts */ @@ -31,3 +32,57 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) } EXPORT_SYMBOL(_copy_to_user); #endif + +/** + * check_zeroed_user: check if a userspace buffer only contains zero bytes + * @from: Source address, in userspace. + * @size: Size of buffer. + * + * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for + * userspace addresses (and is more efficient because we don't care where the + * first non-zero byte is). + * + * Returns: + * * 0: There were non-zero bytes present in the buffer. + * * 1: The buffer was full of zero bytes. + * * -EFAULT: access to userspace failed. + */ +int check_zeroed_user(const void __user *from, size_t size) +{ + unsigned long val; + uintptr_t align = (uintptr_t) from % sizeof(unsigned long); + + if (unlikely(size == 0)) + return 1; + + from -= align; + size += align; + + if (!user_access_begin(from, size)) + return -EFAULT; + + unsafe_get_user(val, (unsigned long __user *) from, err_fault); + if (align) + val &= ~aligned_byte_mask(align); + + while (size > sizeof(unsigned long)) { + if (unlikely(val)) + goto done; + + from += sizeof(unsigned long); + size -= sizeof(unsigned long); + + unsafe_get_user(val, (unsigned long __user *) from, err_fault); + } + + if (size < sizeof(unsigned long)) + val &= aligned_byte_mask(size); + +done: + user_access_end(); + return (val == 0); +err_fault: + user_access_end(); + return -EFAULT; +} +EXPORT_SYMBOL(check_zeroed_user); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 448d686da8b1..0bf5640f1f07 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -4,6 +4,7 @@ */ #ifndef __NFIT_TEST_H__ #define __NFIT_TEST_H__ +#include <linux/acpi.h> #include <linux/list.h> #include <linux/uuid.h> #include <linux/ioport.h> @@ -202,9 +203,6 @@ struct nd_intel_lss { __u32 status; } __packed; -union acpi_object; -typedef void *acpi_handle; - typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 8a4025e912cb..ef1e9bafb098 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -95,7 +95,7 @@ echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent _do_fork' # SAME_PROBE +check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE fi exit 0 diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 62c591f87dab..c5ec868fa1e5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -22,6 +22,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test @@ -48,7 +49,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie) # On s390, build the testcases KVM-enabled pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0c17f2ee685e..ff234018219c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1083,6 +1083,9 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +/* VMX_EPT_VPID_CAP bits */ +#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) + /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 69b17055f63d..6ae5a47fe067 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -569,6 +569,10 @@ struct vmx_pages { void *enlightened_vmcs_hva; uint64_t enlightened_vmcs_gpa; void *enlightened_vmcs; + + void *eptp_hva; + uint64_t eptp_gpa; + void *eptp; }; struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); @@ -576,4 +580,14 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot); +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot); +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); + #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 80a338b5403c..41cf45416060 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -705,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * on error (e.g. currently no memory region using memslot as a KVM * memory slot ID). */ -static struct userspace_mem_region * +struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index f36262e0f655..ac50c42750cf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + #endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c53dbc6bc568..6698cb741e10 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1085,7 +1085,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 9cef0455b819..fab8f6b0bf52 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -7,11 +7,39 @@ #include "test_util.h" #include "kvm_util.h" +#include "../kvm_util_internal.h" #include "processor.h" #include "vmx.h" +#define PAGE_SHIFT_4K 12 + +#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 + bool enable_evmcs; +struct eptPageTableEntry { + uint64_t readable:1; + uint64_t writable:1; + uint64_t executable:1; + uint64_t memory_type:3; + uint64_t ignore_pat:1; + uint64_t page_size:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t ignored_11_10:2; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t suppress_ve:1; +}; + +struct eptPageTablePointer { + uint64_t memory_type:3; + uint64_t page_walk_length:3; + uint64_t ad_enabled:1; + uint64_t reserved_11_07:5; + uint64_t address:40; + uint64_t reserved_63_52:12; +}; int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) { uint16_t evmcs_ver; @@ -174,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { + uint32_t sec_exec_ctl = 0; + vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); - if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) + + if (vmx->eptp_gpa) { + uint64_t ept_paddr; + struct eptPageTablePointer eptp = { + .memory_type = VMX_BASIC_MEM_TYPE_WB, + .page_walk_length = 3, /* + 1 */ + .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), + .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, + }; + + memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); + vmwrite(EPT_POINTER, ept_paddr); + sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; + } + + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); - else + else { vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + GUEST_ASSERT(!sec_exec_ctl); + } + vmwrite(EXCEPTION_BITMAP, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ @@ -327,3 +375,152 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_host_state(); init_vmcs_guest_state(guest_rip, guest_rsp); } + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) +{ + uint16_t index[4]; + struct eptPageTableEntry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((nested_paddr % vm->page_size) == 0, + "Nested physical address not on page boundary,\n" + " nested_paddr: 0x%lx vm->page_size: 0x%x", + nested_paddr, vm->page_size); + TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (nested_paddr >> 12) & 0x1ffu; + index[1] = (nested_paddr >> 21) & 0x1ffu; + index[2] = (nested_paddr >> 30) & 0x1ffu; + index[3] = (nested_paddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = vmx->eptp_hva; + if (!pml4e[index[3]].readable) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].readable = true; + pml4e[index[3]].executable = true; + } + + /* Allocate page directory table if not present. */ + struct eptPageTableEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].readable) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].readable = true; + pdpe[index[2]].executable = true; + } + + /* Allocate page table if not present. */ + struct eptPageTableEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].readable) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].readable = true; + pde[index[1]].executable = true; + } + + /* Fill in page table entry. */ + struct eptPageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].readable = true; + pte[index[0]].executable = true; + + /* + * For now mark these as accessed and dirty because the only + * testcase we have needs that. Can be reconsidered later. + */ + pte[index[0]].accessed = true; + pte[index[0]].dirty = true; +} + +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * eptp_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot) +{ + size_t page_size = vm->page_size; + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot); + nested_paddr += page_size; + paddr += page_size; + } +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vmx, vm, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift, + eptp_memslot); + } +} + +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); + vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c new file mode 100644 index 000000000000..0bca1cfe2c1e --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VCPU_ID 1 + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_SIZE 3 + +/* L1 guest test virtual memory offset */ +#define GUEST_TEST_MEM 0xc0000000 + +/* L2 guest test virtual memory offset */ +#define NESTED_TEST_MEM1 0xc0001000 +#define NESTED_TEST_MEM2 0xc0002000 + +static void l2_guest_code(void) +{ + *(volatile uint64_t *)NESTED_TEST_MEM1; + *(volatile uint64_t *)NESTED_TEST_MEM1 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(false); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(false); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct vmx_pages *vmx; + unsigned long *bmap; + uint64_t *host_test_mem; + + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + bool done = false; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + run = vcpu_state(vm, VCPU_ID); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + GUEST_TEST_MEM, + TEST_MEM_SLOT_INDEX, + TEST_MEM_SIZE, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0002000). This + * affects both L1 and L2. However... + */ + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, + TEST_MEM_SIZE * 4096, 0); + + /* + * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to + * 0xc0000000. + * + * Note that prepare_eptp should be called only L1's GPA map is done, + * meaning after the last call to virt_map. + */ + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0); + + bmap = bitmap_alloc(TEST_MEM_SIZE); + host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); + + while (!done) { + memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096); + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + /* + * The nested guest wrote at offset 0x1000 in the memslot, but the + * dirty bitmap must be filled in according to L1 GPA, not L2. + */ + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + if (uc.args[1]) { + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + } else { + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + } + + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + } +} diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 464c9b76148f..7550f08822a3 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -lpthread +CFLAGS += -g -I../../../../usr/include/ -pthread TEST_GEN_PROGS := pidfd_test pidfd_open_test pidfd_poll_test pidfd_wait diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h index 55fed77a9f73..4fd4f6db181b 100644 --- a/virt/kvm/arm/vgic/trace.h +++ b/virt/kvm/arm/vgic/trace.h @@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending, #endif /* _TRACE_VGIC_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic +#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e6de3159e682..fd68fbe0a75d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -617,8 +617,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; + stat_data->mode = p->mode ? p->mode : 0644; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - debugfs_create_file(p->name, 0644, kvm->debugfs_dentry, + debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, stat_data, stat_fops_per_vm[p->kind]); } return 0; @@ -3929,7 +3930,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; - if (simple_attr_open(inode, file, get, set, fmt)) { + if (simple_attr_open(inode, file, get, + stat_data->mode & S_IWUGO ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -4177,7 +4180,8 @@ static void kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - debugfs_create_file(p->name, 0644, kvm_debugfs_dir, + int mode = p->mode ? p->mode : 0644; + debugfs_create_file(p->name, mode, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); } |