diff options
author | Takashi Iwai <tiwai@suse.de> | 2024-05-08 19:16:58 +0300 |
---|---|---|
committer | Takashi Iwai <tiwai@suse.de> | 2024-05-08 19:16:58 +0300 |
commit | 9b61b2069681b60d0d0bedbd0fe3c70123dddb19 (patch) | |
tree | 483407944dd3037584b8825e0d33feb7f4eddf5f | |
parent | b9112b17950c955071abfd4331d4daa162d6ec4d (diff) | |
parent | 2ff85dc64df5bc0ff12e2f4e23fae7bbadbf1d5d (diff) | |
download | linux-9b61b2069681b60d0d0bedbd0fe3c70123dddb19.tar.xz |
Merge branch 'topic/hda-config-pm-cleanup' into for-next
Pull HD-audio CONFIG_PM cleanup.
Signed-off-by: Takashi Iwai <tiwai@suse.de>
807 files changed, 10279 insertions, 4780 deletions
@@ -20,6 +20,7 @@ Adam Oldham <oldhamca@gmail.com> Adam Radford <aradford@gmail.com> Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com> Adrian Bunk <bunk@stusta.de> +Ajay Kaher <ajay.kaher@broadcom.com> <akaher@vmware.com> Akhil P Oommen <quic_akhilpo@quicinc.com> <akhilpo@codeaurora.org> Alan Cox <alan@lxorguk.ukuu.org.uk> Alan Cox <root@hraefn.swansea.linux.org.uk> @@ -36,6 +37,7 @@ Alexei Avshalom Lazar <quic_ailizaro@quicinc.com> <ailizaro@codeaurora.org> Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com> Alexei Starovoitov <ast@kernel.org> <ast@fb.com> Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com> +Alexey Makhalov <alexey.amakhalov@broadcom.com> <amakhalov@vmware.com> Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com> Alex Shi <alexs@kernel.org> <alex.shi@intel.com> Alex Shi <alexs@kernel.org> <alex.shi@linaro.org> @@ -110,6 +112,7 @@ Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com> Brian Avery <b.avery@hp.com> Brian King <brking@us.ibm.com> Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com> +Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com> Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com> Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org> Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org> @@ -340,7 +343,8 @@ Lee Jones <lee@kernel.org> <joneslee@google.com> Lee Jones <lee@kernel.org> <lee.jones@canonical.com> Lee Jones <lee@kernel.org> <lee.jones@linaro.org> Lee Jones <lee@kernel.org> <lee@ubuntu.com> -Leonard Crestez <leonard.crestez@nxp.com> Leonard Crestez <cdleonard@gmail.com> +Leonard Crestez <cdleonard@gmail.com> <leonard.crestez@nxp.com> +Leonard Crestez <cdleonard@gmail.com> <leonard.crestez@intel.com> Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com> Leonard Göhrs <l.goehrs@pengutronix.de> Leonid I Ananiev <leonid.i.ananiev@intel.com> @@ -497,7 +501,8 @@ Prasad Sodagudi <quic_psodagud@quicinc.com> <psodagud@codeaurora.org> Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com> Qais Yousef <qyousef@layalina.io> <qais.yousef@imgtec.com> Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com> -Quentin Monnet <quentin@isovalent.com> <quentin.monnet@netronome.com> +Quentin Monnet <qmo@kernel.org> <quentin.monnet@netronome.com> +Quentin Monnet <qmo@kernel.org> <quentin@isovalent.com> Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com> Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl> Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org> @@ -527,6 +532,7 @@ Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org> Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com> Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com> Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru> +Ronak Doshi <ronak.doshi@broadcom.com> <doshir@vmware.com> Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com> Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com> Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com> @@ -649,6 +655,7 @@ Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com> Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com> Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org> Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com> +Vishnu Dasa <vishnu.dasa@broadcom.com> <vdasa@vmware.com> Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org> Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com> Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bb884c14b2f6..623fce7d5fcd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6599,7 +6599,7 @@ To turn off having tracepoints sent to printk, echo 0 > /proc/sys/kernel/tracepoint_printk Note, echoing 1 into this file without the - tracepoint_printk kernel cmdline option has no effect. + tp_printk kernel cmdline option has no effect. The tp_printk_stop_on_boot (see below) can also be used to stop the printing of events to console at diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst index b42132969e31..13632671adae 100644 --- a/Documentation/admin-guide/mm/zswap.rst +++ b/Documentation/admin-guide/mm/zswap.rst @@ -155,7 +155,7 @@ Setting this parameter to 100 will disable the hysteresis. Some users cannot tolerate the swapping that comes with zswap store failures and zswap writebacks. Swapping can be disabled entirely (without disabling -zswap itself) on a cgroup-basis as follows: +zswap itself) on a cgroup-basis as follows:: echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback @@ -166,7 +166,7 @@ writeback (because the same pages might be rejected again and again). When there is a sizable amount of cold memory residing in the zswap pool, it can be advantageous to proactively write these cold pages to swap and reclaim the memory for other use cases. By default, the zswap shrinker is disabled. -User can enable it as follows: +User can enable it as follows:: echo Y > /sys/module/zswap/parameters/shrinker_enabled diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 3712d81cb50c..6c245582d8fb 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -574,7 +574,7 @@ Memory b/w domain is L3 cache. MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;... Memory bandwidth Allocation specified in MiBps ---------------------------------------------- +---------------------------------------------- Memory bandwidth domain is L3 cache. :: diff --git a/Documentation/dev-tools/testing-overview.rst b/Documentation/dev-tools/testing-overview.rst index 0aaf6ea53608..1619e5e5cc9c 100644 --- a/Documentation/dev-tools/testing-overview.rst +++ b/Documentation/dev-tools/testing-overview.rst @@ -104,6 +104,8 @@ Some of these tools are listed below: KASAN and can be used in production. See Documentation/dev-tools/kfence.rst * lockdep is a locking correctness validator. See Documentation/locking/lockdep-design.rst +* Runtime Verification (RV) supports checking specific behaviours for a given + subsystem. See Documentation/trace/rv/runtime-verification.rst * There are several other pieces of debug instrumentation in the kernel, many of which can be found in lib/Kconfig.debug diff --git a/Documentation/devicetree/bindings/clock/keystone-gate.txt b/Documentation/devicetree/bindings/clock/keystone-gate.txt index c5aa187026e3..43f6fb6c9392 100644 --- a/Documentation/devicetree/bindings/clock/keystone-gate.txt +++ b/Documentation/devicetree/bindings/clock/keystone-gate.txt @@ -1,5 +1,3 @@ -Status: Unstable - ABI compatibility may be broken in the future - Binding for Keystone gate control driver which uses PSC controller IP. This binding uses the common clock binding[1]. diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt index 9a3fbc665606..69b0eb7c03c9 100644 --- a/Documentation/devicetree/bindings/clock/keystone-pll.txt +++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt @@ -1,5 +1,3 @@ -Status: Unstable - ABI compatibility may be broken in the future - Binding for keystone PLLs. The main PLL IP typically has a multiplier, a divider and a post divider. The additional PLL IPs like ARMPLL, DDRPLL and PAPLL are controlled by the memory mapped register where as the Main diff --git a/Documentation/devicetree/bindings/clock/ti/adpll.txt b/Documentation/devicetree/bindings/clock/ti/adpll.txt index 4c8a2ce2cd70..3122360adcf3 100644 --- a/Documentation/devicetree/bindings/clock/ti/adpll.txt +++ b/Documentation/devicetree/bindings/clock/ti/adpll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments ADPLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped ADPLL with two to three selectable input clocks and three to four children. diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt index ade4dd4c30f0..bbd505c1199d 100644 --- a/Documentation/devicetree/bindings/clock/ti/apll.txt +++ b/Documentation/devicetree/bindings/clock/ti/apll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments APLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped APLL with usually two selectable input clocks (reference clock and bypass clock), with analog phase locked diff --git a/Documentation/devicetree/bindings/clock/ti/autoidle.txt b/Documentation/devicetree/bindings/clock/ti/autoidle.txt index 7c735dde9fe9..05645a10a9e3 100644 --- a/Documentation/devicetree/bindings/clock/ti/autoidle.txt +++ b/Documentation/devicetree/bindings/clock/ti/autoidle.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments autoidle clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register mapped clock which can be put to idle automatically by hardware based on the usage and a configuration bit setting. Autoidle clock is never an individual diff --git a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt index 9c6199249ce5..edf0b5d42768 100644 --- a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt +++ b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments clockdomain. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1] in consumer role. Every clock on TI SoC belongs to one clockdomain, but software only needs this information for specific clocks which require diff --git a/Documentation/devicetree/bindings/clock/ti/composite.txt b/Documentation/devicetree/bindings/clock/ti/composite.txt index 33ac7c9ad053..6f7e1331b546 100644 --- a/Documentation/devicetree/bindings/clock/ti/composite.txt +++ b/Documentation/devicetree/bindings/clock/ti/composite.txt @@ -1,7 +1,5 @@ Binding for TI composite clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped composite clock with multiple different sub-types; diff --git a/Documentation/devicetree/bindings/clock/ti/divider.txt b/Documentation/devicetree/bindings/clock/ti/divider.txt index 9b13b32974f9..4d7c76f0b356 100644 --- a/Documentation/devicetree/bindings/clock/ti/divider.txt +++ b/Documentation/devicetree/bindings/clock/ti/divider.txt @@ -1,7 +1,5 @@ Binding for TI divider clock -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped adjustable clock rate divider that does not gate and has only one input clock or parent. By default the value programmed into diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt index 37a7cb6ad07d..14a1b72c2e71 100644 --- a/Documentation/devicetree/bindings/clock/ti/dpll.txt +++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments DPLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped DPLL with usually two selectable input clocks (reference clock and bypass clock), with digital phase locked diff --git a/Documentation/devicetree/bindings/clock/ti/fapll.txt b/Documentation/devicetree/bindings/clock/ti/fapll.txt index c19b3f253b8c..88986ef39ddd 100644 --- a/Documentation/devicetree/bindings/clock/ti/fapll.txt +++ b/Documentation/devicetree/bindings/clock/ti/fapll.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments FAPLL clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped FAPLL with usually two selectable input clocks (reference clock and bypass clock), and one or more child diff --git a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt index 518e3c142276..dc69477b6e98 100644 --- a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt +++ b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt @@ -1,7 +1,5 @@ Binding for TI fixed factor rate clock sources. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1], and also uses the autoidle support from TI autoidle clock [2]. diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt index 4982615c01b9..a8e0335b006a 100644 --- a/Documentation/devicetree/bindings/clock/ti/gate.txt +++ b/Documentation/devicetree/bindings/clock/ti/gate.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments gate clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. This clock is quite much similar to the basic gate-clock [2], however, it supports a number of additional features. If no register diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt index d3eb5ca92a7f..85fb1f2d2d28 100644 --- a/Documentation/devicetree/bindings/clock/ti/interface.txt +++ b/Documentation/devicetree/bindings/clock/ti/interface.txt @@ -1,7 +1,5 @@ Binding for Texas Instruments interface clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. This clock is quite much similar to the basic gate-clock [2], however, it supports a number of additional features, including diff --git a/Documentation/devicetree/bindings/clock/ti/mux.txt b/Documentation/devicetree/bindings/clock/ti/mux.txt index b33f641f1043..cd56d3c1c09f 100644 --- a/Documentation/devicetree/bindings/clock/ti/mux.txt +++ b/Documentation/devicetree/bindings/clock/ti/mux.txt @@ -1,7 +1,5 @@ Binding for TI mux clock. -Binding status: Unstable - ABI compatibility may be broken in the future - This binding uses the common clock binding[1]. It assumes a register-mapped multiplexer with multiple input clock signals or parents, one of which can be selected as output. This clock does not diff --git a/Documentation/devicetree/bindings/dts-coding-style.rst b/Documentation/devicetree/bindings/dts-coding-style.rst index a9bdd2b59dca..8a68331075a0 100644 --- a/Documentation/devicetree/bindings/dts-coding-style.rst +++ b/Documentation/devicetree/bindings/dts-coding-style.rst @@ -144,6 +144,8 @@ Example:: #dma-cells = <1>; clocks = <&clock_controller 0>, <&clock_controller 1>; clock-names = "bus", "host"; + #address-cells = <1>; + #size-cells = <1>; vendor,custom-property = <2>; status = "disabled"; diff --git a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml index 528ef3572b62..055a3351880b 100644 --- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml @@ -94,6 +94,10 @@ properties: local-bd-address: true + qcom,local-bd-address-broken: + type: boolean + description: + boot firmware is incorrectly passing the address in big-endian order required: - compatible diff --git a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt index 25f8658e216f..48a49c516b62 100644 --- a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt +++ b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt @@ -1,9 +1,6 @@ TI Davinci DSP devices ======================= -Binding status: Unstable - Subject to changes for DT representation of clocks - and resets - The TI Davinci family of SoCs usually contains a TI DSP Core sub-system that is used to offload some of the processor-intensive tasks or algorithms, for achieving various system level goals. diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml index 397f75909b20..ce1a6505eb51 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml @@ -51,7 +51,7 @@ properties: ranges: true patternProperties: - "^clock-controller@[0-9a-z]+$": + "^clock-controller@[0-9a-f]+$": $ref: /schemas/clock/fsl,flexspi-clock.yaml# required: diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml index 8d088b5fe823..a6a511b00a12 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml @@ -41,7 +41,7 @@ properties: ranges: true patternProperties: - "^interrupt-controller@[a-z0-9]+$": + "^interrupt-controller@[a-f0-9]+$": $ref: /schemas/interrupt-controller/fsl,ls-extirq.yaml# required: diff --git a/Documentation/devicetree/bindings/sound/rt5645.txt b/Documentation/devicetree/bindings/sound/rt5645.txt index 41a62fd2ae1f..c1fa379f5f3e 100644 --- a/Documentation/devicetree/bindings/sound/rt5645.txt +++ b/Documentation/devicetree/bindings/sound/rt5645.txt @@ -20,6 +20,11 @@ Optional properties: a GPIO spec for the external headphone detect pin. If jd-mode = 0, we will get the JD status by getting the value of hp-detect-gpios. +- cbj-sleeve-gpios: + a GPIO spec to control the external combo jack circuit to tie the sleeve/ring2 + contacts to the ground or floating. It could avoid some electric noise from the + active speaker jacks. + - realtek,in2-differential Boolean. Indicate MIC2 input are differential, rather than single-ended. @@ -68,6 +73,7 @@ codec: rt5650@1a { compatible = "realtek,rt5650"; reg = <0x1a>; hp-detect-gpios = <&gpio 19 0>; + cbj-sleeve-gpios = <&gpio 20 0>; interrupt-parent = <&gpio>; interrupts = <7 IRQ_TYPE_EDGE_FALLING>; realtek,dmic-en = "true"; diff --git a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml index 7a4a6ab85970..ab8f28993139 100644 --- a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml +++ b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml @@ -60,7 +60,7 @@ properties: be implemented in an always-on power domain." patternProperties: - '^frame@[0-9a-z]*$': + '^frame@[0-9a-f]+$': type: object additionalProperties: false description: A timer node has up to 8 frame sub-nodes, each with the following properties. diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml index 10c146424baa..cd3680dc002f 100644 --- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml @@ -27,10 +27,13 @@ properties: - qcom,msm8996-ufshc - qcom,msm8998-ufshc - qcom,sa8775p-ufshc + - qcom,sc7180-ufshc - qcom,sc7280-ufshc + - qcom,sc8180x-ufshc - qcom,sc8280xp-ufshc - qcom,sdm845-ufshc - qcom,sm6115-ufshc + - qcom,sm6125-ufshc - qcom,sm6350-ufshc - qcom,sm8150-ufshc - qcom,sm8250-ufshc @@ -42,11 +45,11 @@ properties: - const: jedec,ufs-2.0 clocks: - minItems: 8 + minItems: 7 maxItems: 11 clock-names: - minItems: 8 + minItems: 7 maxItems: 11 dma-coherent: true @@ -117,9 +120,35 @@ allOf: compatible: contains: enum: + - qcom,sc7180-ufshc + then: + properties: + clocks: + minItems: 7 + maxItems: 7 + clock-names: + items: + - const: core_clk + - const: bus_aggr_clk + - const: iface_clk + - const: core_clk_unipro + - const: ref_clk + - const: tx_lane0_sync_clk + - const: rx_lane0_sync_clk + reg: + maxItems: 1 + reg-names: + maxItems: 1 + + - if: + properties: + compatible: + contains: + enum: - qcom,msm8998-ufshc - qcom,sa8775p-ufshc - qcom,sc7280-ufshc + - qcom,sc8180x-ufshc - qcom,sc8280xp-ufshc - qcom,sm8250-ufshc - qcom,sm8350-ufshc @@ -215,6 +244,7 @@ allOf: contains: enum: - qcom,sm6115-ufshc + - qcom,sm6125-ufshc then: properties: clocks: @@ -248,7 +278,7 @@ allOf: reg: maxItems: 1 clocks: - minItems: 8 + minItems: 7 maxItems: 8 else: properties: @@ -256,7 +286,7 @@ allOf: minItems: 1 maxItems: 2 clocks: - minItems: 8 + minItems: 7 maxItems: 11 unevaluatedProperties: false diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index b1d97fafddcf..bb5c44f8bd1c 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -178,7 +178,7 @@ yet. Bug reports are always welcome at the issue tracker below! - ``LLVM=1`` * - s390 - Maintained - - ``CC=clang`` + - ``LLVM=1`` (LLVM >= 18.1.0), ``CC=clang`` (LLVM < 18.1.0) * - um (User Mode) - Maintained - ``LLVM=1`` diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst new file mode 100644 index 000000000000..08bb39ab1528 --- /dev/null +++ b/Documentation/networking/devlink/devlink-eswitch-attr.rst @@ -0,0 +1,76 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================== +Devlink E-Switch Attribute +========================== + +Devlink E-Switch supports two modes of operation: legacy and switchdev. +Legacy mode operates based on traditional MAC/VLAN steering rules. Switching +decisions are made based on MAC addresses, VLANs, etc. There is limited ability +to offload switching rules to hardware. + +On the other hand, switchdev mode allows for more advanced offloading +capabilities of the E-Switch to hardware. In switchdev mode, more switching +rules and logic can be offloaded to the hardware switch ASIC. It enables +representor netdevices that represent the slow path of virtual functions (VFs) +or scalable-functions (SFs) of the device. See more information about +:ref:`Documentation/networking/switchdev.rst <switchdev>` and +:ref:`Documentation/networking/representors.rst <representors>`. + +In addition, the devlink E-Switch also comes with other attributes listed +in the following section. + +Attributes Description +====================== + +The following is a list of E-Switch attributes. + +.. list-table:: E-Switch attributes + :widths: 8 5 45 + + * - Name + - Type + - Description + * - ``mode`` + - enum + - The mode of the device. The mode can be one of the following: + + * ``legacy`` operates based on traditional MAC/VLAN steering + rules. + * ``switchdev`` allows for more advanced offloading capabilities of + the E-Switch to hardware. + * - ``inline-mode`` + - enum + - Some HWs need the VF driver to put part of the packet + headers on the TX descriptor so the e-switch can do proper + matching and steering. Support for both switchdev mode and legacy mode. + + * ``none`` none. + * ``link`` L2 mode. + * ``network`` L3 mode. + * ``transport`` L4 mode. + * - ``encap-mode`` + - enum + - The encapsulation mode of the device. Support for both switchdev mode + and legacy mode. The mode can be one of the following: + + * ``none`` Disable encapsulation support. + * ``basic`` Enable encapsulation support. + +Example Usage +============= + +.. code:: shell + + # enable switchdev mode + $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev + + # set inline-mode and encap-mode + $ devlink dev eswitch set pci/0000:08:00.0 inline-mode none encap-mode basic + + # display devlink device eswitch attributes + $ devlink dev eswitch show pci/0000:08:00.0 + pci/0000:08:00.0: mode switchdev inline-mode none encap-mode basic + + # enable encap-mode with legacy mode + $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index e14d7a701b72..948c8c44e233 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -67,6 +67,7 @@ general. devlink-selftests devlink-trap devlink-linecard + devlink-eswitch-attr Driver-specific documentation ----------------------------- diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst index decb39c19b9e..5e23386f6968 100644 --- a/Documentation/networking/representors.rst +++ b/Documentation/networking/representors.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 +.. _representors: ============================= Network Function Representors diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 995780088eb2..84335d119ff1 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -46,21 +46,16 @@ SEV hardware uses ASIDs to associate a memory encryption key with a VM. Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value defined in the CPUID 0x8000001f[ecx] field. -SEV Key Management -================== +The KVM_MEMORY_ENCRYPT_OP ioctl +=============================== -The SEV guest key management is handled by a separate processor called the AMD -Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure -key management interface to perform common hypervisor activities such as -encrypting bootstrap code, snapshot, migrating and debugging the guest. For more -information, see the SEV Key Management spec [api-spec]_ - -The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP. If the argument -to KVM_MEMORY_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled -and ``ENOTTY`` if it is disabled (on some older versions of Linux, -the ioctl runs normally even with a NULL argument, and therefore will -likely return ``EFAULT``). If non-NULL, the argument to KVM_MEMORY_ENCRYPT_OP -must be a struct kvm_sev_cmd:: +The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP, which operates on +the VM file descriptor. If the argument to KVM_MEMORY_ENCRYPT_OP is NULL, +the ioctl returns 0 if SEV is enabled and ``ENOTTY`` if it is disabled +(on some older versions of Linux, the ioctl tries to run normally even +with a NULL argument, and therefore will likely return ``EFAULT`` instead +of zero if SEV is enabled). If non-NULL, the argument to +KVM_MEMORY_ENCRYPT_OP must be a struct kvm_sev_cmd:: struct kvm_sev_cmd { __u32 id; @@ -87,10 +82,6 @@ guests, such as launching, running, snapshotting, migrating and decommissioning. The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform context. In a typical workflow, this command should be the first command issued. -The firmware can be initialized either by using its own non-volatile storage or -the OS can manage the NV storage for the firmware using the module parameter -``init_ex_path``. If the file specified by ``init_ex_path`` does not exist or -is invalid, the OS will create or override the file with output from PSP. Returns: 0 on success, -negative on error @@ -434,6 +425,21 @@ issued by the hypervisor to make the guest ready for execution. Returns: 0 on success, -negative on error +Firmware Management +=================== + +The SEV guest key management is handled by a separate processor called the AMD +Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure +key management interface to perform common hypervisor activities such as +encrypting bootstrap code, snapshot, migrating and debugging the guest. For more +information, see the SEV Key Management spec [api-spec]_ + +The AMD-SP firmware can be initialized either by using its own non-volatile +storage or the OS can manage the NV storage for the firmware using +parameter ``init_ex_path`` of the ``ccp`` module. If the file specified +by ``init_ex_path`` does not exist or is invalid, the OS will create or +override the file with PSP non-volatile storage. + References ========== diff --git a/Documentation/virt/kvm/x86/msr.rst b/Documentation/virt/kvm/x86/msr.rst index 9315fc385fb0..3aecf2a70e7b 100644 --- a/Documentation/virt/kvm/x86/msr.rst +++ b/Documentation/virt/kvm/x86/msr.rst @@ -193,8 +193,8 @@ data: Asynchronous page fault (APF) control MSR. Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area - which must be in guest RAM and must be zeroed. This memory is expected - to hold a copy of the following structure:: + which must be in guest RAM. This memory is expected to hold the + following structure:: struct kvm_vcpu_pv_apf_data { /* Used for 'page not present' events delivered via #PF */ @@ -204,7 +204,6 @@ data: __u32 token; __u8 pad[56]; - __u32 enabled; }; Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1 @@ -232,14 +231,14 @@ data: as regular page fault, guest must reset 'flags' to '0' before it does something that can generate normal page fault. - Bytes 5-7 of 64 byte memory location ('token') will be written to by the + Bytes 4-7 of 64 byte memory location ('token') will be written to by the hypervisor at the time of APF 'page ready' event injection. The content - of these bytes is a token which was previously delivered as 'page not - present' event. The event indicates the page in now available. Guest is - supposed to write '0' to 'token' when it is done handling 'page ready' - event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location; - writing to the MSR forces KVM to re-scan its queue and deliver the next - pending notification. + of these bytes is a token which was previously delivered in CR2 as + 'page not present' event. The event indicates the page is now available. + Guest is supposed to write '0' to 'token' when it is done handling + 'page ready' event and to write '1' to MSR_KVM_ASYNC_PF_ACK after + clearing the location; writing to the MSR forces KVM to re-scan its + queue and deliver the next pending notification. Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page ready' APF delivery needs to be written to before enabling APF mechanism diff --git a/MAINTAINERS b/MAINTAINERS index aa3b947fb080..aea47e04c3a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3942,8 +3942,7 @@ F: kernel/bpf/ringbuf.c BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF) M: KP Singh <kpsingh@kernel.org> -R: Florent Revest <revest@chromium.org> -R: Brendan Jackman <jackmanb@chromium.org> +R: Matt Bobrowski <mattbobrowski@google.com> L: bpf@vger.kernel.org S: Maintained F: Documentation/bpf/prog_lsm.rst @@ -3968,7 +3967,7 @@ F: kernel/bpf/bpf_lru* F: kernel/bpf/cgroup.c BPF [TOOLING] (bpftool) -M: Quentin Monnet <quentin@isovalent.com> +M: Quentin Monnet <qmo@kernel.org> L: bpf@vger.kernel.org S: Maintained F: kernel/bpf/disasm.* @@ -6157,7 +6156,6 @@ DEVICE-MAPPER (LVM) M: Alasdair Kergon <agk@redhat.com> M: Mike Snitzer <snitzer@kernel.org> M: Mikulas Patocka <mpatocka@redhat.com> -M: dm-devel@lists.linux.dev L: dm-devel@lists.linux.dev S: Maintained Q: http://patchwork.kernel.org/project/dm-devel/list/ @@ -6173,7 +6171,6 @@ F: include/uapi/linux/dm-*.h DEVICE-MAPPER VDO TARGET M: Matthew Sakai <msakai@redhat.com> -M: dm-devel@lists.linux.dev L: dm-devel@lists.linux.dev S: Maintained F: Documentation/admin-guide/device-mapper/vdo*.rst @@ -7941,6 +7938,7 @@ M: Gao Xiang <xiang@kernel.org> M: Chao Yu <chao@kernel.org> R: Yue Hu <huyue2@coolpad.com> R: Jeffle Xu <jefflexu@linux.alibaba.com> +R: Sandeep Dhavale <dhavale@google.com> L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org @@ -9653,7 +9651,9 @@ L: linux-input@vger.kernel.org S: Maintained F: drivers/hid/hid-logitech-hidpp.c -HIGH-RESOLUTION TIMERS, CLOCKEVENTS +HIGH-RESOLUTION TIMERS, TIMER WHEEL, CLOCKEVENTS +M: Anna-Maria Behnsen <anna-maria@linutronix.de> +M: Frederic Weisbecker <frederic@kernel.org> M: Thomas Gleixner <tglx@linutronix.de> L: linux-kernel@vger.kernel.org S: Maintained @@ -9661,9 +9661,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: Documentation/timers/ F: include/linux/clockchips.h F: include/linux/hrtimer.h +F: include/linux/timer.h F: kernel/time/clockevents.c F: kernel/time/hrtimer.c -F: kernel/time/timer_*.c +F: kernel/time/timer.c +F: kernel/time/timer_list.c +F: kernel/time/timer_migration.* +F: tools/testing/selftests/timers/ HIGH-SPEED SCC DRIVER FOR AX.25 L: linux-hams@vger.kernel.org @@ -13134,6 +13138,7 @@ F: drivers/net/ethernet/marvell/mvpp2/ MARVELL MWIFIEX WIRELESS DRIVER M: Brian Norris <briannorris@chromium.org> +R: Francesco Dolcini <francesco@dolcini.it> L: linux-wireless@vger.kernel.org S: Odd Fixes F: drivers/net/wireless/marvell/mwifiex/ @@ -14014,6 +14019,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) M: Saeed Mahameed <saeedm@nvidia.com> +M: Tariq Toukan <tariqt@nvidia.com> L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -14081,6 +14087,7 @@ F: include/uapi/rdma/mlx4-abi.h MELLANOX MLX5 core VPI driver M: Saeed Mahameed <saeedm@nvidia.com> M: Leon Romanovsky <leonro@nvidia.com> +M: Tariq Toukan <tariqt@nvidia.com> L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Supported @@ -15627,9 +15634,10 @@ F: drivers/misc/nsm.c F: include/uapi/linux/nsm.h NOHZ, DYNTICKS SUPPORT +M: Anna-Maria Behnsen <anna-maria@linutronix.de> M: Frederic Weisbecker <frederic@kernel.org> -M: Thomas Gleixner <tglx@linutronix.de> M: Ingo Molnar <mingo@kernel.org> +M: Thomas Gleixner <tglx@linutronix.de> L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/nohz @@ -16725,9 +16733,9 @@ F: include/uapi/linux/ppdev.h PARAVIRT_OPS INTERFACE M: Juergen Gross <jgross@suse.com> -R: Ajay Kaher <akaher@vmware.com> -R: Alexey Makhalov <amakhalov@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +R: Ajay Kaher <ajay.kaher@broadcom.com> +R: Alexey Makhalov <alexey.amakhalov@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@ -17590,15 +17598,20 @@ F: drivers/pnp/ F: include/linux/pnp.h POSIX CLOCKS and TIMERS +M: Anna-Maria Behnsen <anna-maria@linutronix.de> +M: Frederic Weisbecker <frederic@kernel.org> M: Thomas Gleixner <tglx@linutronix.de> L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: fs/timerfd.c F: include/linux/time_namespace.h -F: include/linux/timer* +F: include/linux/timerfd.h +F: include/uapi/linux/time.h +F: include/uapi/linux/timerfd.h F: include/trace/events/timer* -F: kernel/time/*timer* +F: kernel/time/itimer.c +F: kernel/time/posix-* F: kernel/time/namespace.c POWER MANAGEMENT CORE @@ -18645,18 +18658,21 @@ REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih <pkshih@realtek.com> L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtlwifi/ REALTEK WIRELESS DRIVER (rtw88) M: Ping-Ke Shih <pkshih@realtek.com> L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtw88/ REALTEK WIRELESS DRIVER (rtw89) M: Ping-Ke Shih <pkshih@realtek.com> L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtw89/ REDPINE WIRELESS DRIVER @@ -18727,13 +18743,24 @@ S: Supported F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c -RENESAS ETHERNET DRIVERS +RENESAS ETHERNET AVB DRIVER R: Sergey Shtylyov <s.shtylyov@omp.ru> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org -F: Documentation/devicetree/bindings/net/renesas,*.yaml -F: drivers/net/ethernet/renesas/ -F: include/linux/sh_eth.h +F: Documentation/devicetree/bindings/net/renesas,etheravb.yaml +F: drivers/net/ethernet/renesas/Kconfig +F: drivers/net/ethernet/renesas/Makefile +F: drivers/net/ethernet/renesas/ravb* + +RENESAS ETHERNET SWITCH DRIVER +R: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> +L: netdev@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +F: Documentation/devicetree/bindings/net/renesas,*ether-switch.yaml +F: drivers/net/ethernet/renesas/Kconfig +F: drivers/net/ethernet/renesas/Makefile +F: drivers/net/ethernet/renesas/rcar_gen4* +F: drivers/net/ethernet/renesas/rswitch* RENESAS IDT821034 ASoC CODEC M: Herve Codina <herve.codina@bootlin.com> @@ -18843,6 +18870,16 @@ S: Supported F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml F: drivers/i2c/busses/i2c-rzv2m.c +RENESAS SUPERH ETHERNET DRIVER +R: Sergey Shtylyov <s.shtylyov@omp.ru> +L: netdev@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +F: Documentation/devicetree/bindings/net/renesas,ether.yaml +F: drivers/net/ethernet/renesas/Kconfig +F: drivers/net/ethernet/renesas/Makefile +F: drivers/net/ethernet/renesas/sh_eth* +F: include/linux/sh_eth.h + RENESAS USB PHY DRIVER M: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> L: linux-renesas-soc@vger.kernel.org @@ -19179,12 +19216,14 @@ M: Hin-Tak Leung <hintak.leung@gmail.com> M: Larry Finger <Larry.Finger@lwfinger.net> L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtl818x/rtl8187/ RTL8XXXU WIRELESS DRIVER (rtl8xxxu) M: Jes Sorensen <Jes.Sorensen@gmail.com> L: linux-wireless@vger.kernel.org S: Maintained +T: git https://github.com/pkshih/rtw.git F: drivers/net/wireless/realtek/rtl8xxxu/ RTRS TRANSPORT DRIVERS @@ -22254,13 +22293,20 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: include/linux/clocksource.h F: include/linux/time.h +F: include/linux/timekeeper_internal.h +F: include/linux/timekeeping.h F: include/linux/timex.h F: include/uapi/linux/time.h F: include/uapi/linux/timex.h F: kernel/time/alarmtimer.c -F: kernel/time/clocksource.c -F: kernel/time/ntp.c -F: kernel/time/time*.c +F: kernel/time/clocksource* +F: kernel/time/ntp* +F: kernel/time/time.c +F: kernel/time/timeconst.bc +F: kernel/time/timeconv.c +F: kernel/time/timecounter.c +F: kernel/time/timekeeping* +F: kernel/time/time_test.c F: tools/testing/selftests/timers/ TIPC NETWORK LAYER @@ -22384,6 +22430,7 @@ S: Maintained W: https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity Q: https://patchwork.kernel.org/project/linux-integrity/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git +F: Documentation/devicetree/bindings/tpm/ F: drivers/char/tpm/ TPS546D24 DRIVER @@ -23608,9 +23655,9 @@ S: Supported F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE -M: Ajay Kaher <akaher@vmware.com> -M: Alexey Makhalov <amakhalov@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +M: Ajay Kaher <ajay.kaher@broadcom.com> +M: Alexey Makhalov <alexey.amakhalov@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@ -23619,34 +23666,34 @@ F: arch/x86/include/asm/vmware.h F: arch/x86/kernel/cpu/vmware.c VMWARE PVRDMA DRIVER -M: Bryan Tan <bryantan@vmware.com> -M: Vishnu Dasa <vdasa@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +M: Bryan Tan <bryan-bt.tan@broadcom.com> +M: Vishnu Dasa <vishnu.dasa@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/vmw_pvrdma/ VMWARE PVSCSI DRIVER -M: Vishal Bhakta <vbhakta@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +M: Vishal Bhakta <vishal.bhakta@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER -M: Jeff Sipek <jsipek@vmware.com> -R: Ajay Kaher <akaher@vmware.com> -R: Alexey Makhalov <amakhalov@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +M: Nick Shi <nick.shi@broadcom.com> +R: Ajay Kaher <ajay.kaher@broadcom.com> +R: Alexey Makhalov <alexey.amakhalov@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c VMWARE VMCI DRIVER -M: Bryan Tan <bryantan@vmware.com> -M: Vishnu Dasa <vdasa@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +M: Bryan Tan <bryan-bt.tan@broadcom.com> +M: Vishnu Dasa <vishnu.dasa@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: linux-kernel@vger.kernel.org S: Supported F: drivers/misc/vmw_vmci/ @@ -23661,16 +23708,16 @@ F: drivers/input/mouse/vmmouse.c F: drivers/input/mouse/vmmouse.h VMWARE VMXNET3 ETHERNET DRIVER -M: Ronak Doshi <doshir@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +M: Ronak Doshi <ronak.doshi@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: netdev@vger.kernel.org S: Supported F: drivers/net/vmxnet3/ VMWARE VSOCK VMCI TRANSPORT DRIVER -M: Bryan Tan <bryantan@vmware.com> -M: Vishnu Dasa <vdasa@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> +M: Bryan Tan <bryan-bt.tan@broadcom.com> +M: Vishnu Dasa <vishnu.dasa@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> L: linux-kernel@vger.kernel.org S: Supported F: net/vmw_vsock/vmci_transport* @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h new file mode 100644 index 000000000000..2189e507c8e0 --- /dev/null +++ b/arch/arm/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <asm/system_info.h> +#include <uapi/asm/mman.h> + +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index f3a6da8b2890..5260c63db007 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -944,6 +944,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ce08b744aaab..06234c3a15f3 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -291,6 +291,21 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) blr x2 0: mov_q x0, HCR_HOST_NVHE_FLAGS + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be + * RES1 in that case. Publish the E2H bit early so that + * it can be picked up by the init_el2_state macro. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f + + orr x0, x0, #HCR_E2H +1: msr hcr_el2, x0 isb @@ -303,22 +318,10 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) mov_q x1, INIT_SCTLR_EL1_MMU_OFF - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x0, SYS_ID_AA64MMFR4_EL1 - ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH - tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - mrs x0, hcr_el2 and x0, x0, #HCR_E2H cbz x0, 2f -1: + /* Set a sane SCTLR_EL1, the VHE way */ pre_disable_mmu_workaround msr_s SYS_SCTLR_EL12, x1 diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 162b030ab9da..0d022599eb61 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -761,7 +761,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, { unsigned int vq; bool active; - bool fpsimd_only; enum vec_type task_type; memset(header, 0, sizeof(*header)); @@ -777,12 +776,10 @@ static void sve_init_header_from_task(struct user_sve_header *header, case ARM64_VEC_SVE: if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); break; case ARM64_VEC_SME: if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = false; break; default: WARN_ON_ONCE(1); @@ -790,7 +787,7 @@ static void sve_init_header_from_task(struct user_sve_header *header, } if (active) { - if (fpsimd_only) { + if (target->thread.fp_type == FP_STATE_FPSIMD) { header->flags |= SVE_PT_REGS_FPSIMD; } else { header->flags |= SVE_PT_REGS_SVE; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3dee5490eea9..c4a0a35e02c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - if (is_protected_kvm_enabled()) { - kvm_info("Protected nVHE mode initialized successfully\n"); - } else if (in_hyp_mode) { - kvm_info("VHE mode initialized successfully\n"); - } else { - char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n'; - kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode); - } + kvm_info("%s%sVHE mode initialized successfully\n", + in_hyp_mode ? "" : (is_protected_kvm_enabled() ? + "Protected " : "Hyp "), + in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? + "h" : "n")); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index a60fb13e2192..2fc68da4036d 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt, false); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3fae5830f8d2..5a59ef88b646 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN); } else { if (ctx->end - ctx->addr < granule) return -EINVAL; @@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * Perform the appropriate TLB invalidation based on the * evicted pte value (if any). */ - if (kvm_pte_table(ctx->old, ctx->level)) - kvm_tlb_flush_vmid_range(mmu, ctx->addr, - kvm_granule_size(ctx->level)); - else if (kvm_pte_valid(ctx->old)) + if (kvm_pte_table(ctx->old, ctx->level)) { + u64 size = kvm_granule_size(ctx->level); + u64 addr = ALIGN_DOWN(ctx->addr, size); + + kvm_tlb_flush_vmid_range(mmu, addr, size); + } else if (kvm_pte_valid(ctx->old)) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + } } if (stage2_pte_is_counted(ctx->old)) @@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - if (!stage2_unmap_defer_tlb_flush(pgt)) - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, - ctx->addr, ctx->level); + if (kvm_pte_table(ctx->old, ctx->level)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + TLBI_TTL_UNKNOWN); + } else if (!stage2_unmap_defer_tlb_flush(pgt)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + ctx->level); + } } mm_ops->put_page(ctx->ptep); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index b32e2940df7d..1a60b95381e8 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18680771cdb0..dc04bc767865 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (esr_fsc_is_permission_fault(esr)) { + if (esr_fsc_is_translation_fault(esr)) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c5b461dda438..122021f9bdfc 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -943,7 +943,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: - emit(A64_REV32(is64, dst, dst), ctx); + emit(A64_REV32(0, dst, dst), ctx); /* upper 32 bits already cleared */ break; case 64: @@ -1256,7 +1256,7 @@ emit_cond_jmp: } else { emit_a64_mov_i(1, tmp, off, ctx); if (sign_extend) - emit(A64_LDRSW(dst, src_adj, off_adj), ctx); + emit(A64_LDRSW(dst, src, tmp), ctx); else emit(A64_LDR32(dst, src, tmp), ctx); } diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 1140051a0c45..1150b77fa281 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -63,6 +63,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 06ef440d16ce..516dc7022bd7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -619,15 +619,6 @@ config MACH_EYEQ5 bool -config FIT_IMAGE_FDT_EPM5 - bool "Include FDT for Mobileye EyeQ5 development platforms" - depends on MACH_EYEQ5 - default n - help - Enable this to include the FDT for the EyeQ5 development platforms - from Mobileye in the FIT kernel image. - This requires u-boot on the platform. - config MACH_NINTENDO64 bool "Nintendo 64 console" select CEVT_R4K @@ -1011,6 +1002,15 @@ config CAVIUM_OCTEON_SOC endchoice +config FIT_IMAGE_FDT_EPM5 + bool "Include FDT for Mobileye EyeQ5 development platforms" + depends on MACH_EYEQ5 + default n + help + Enable this to include the FDT for the EyeQ5 development platforms + from Mobileye in the FIT kernel image. + This requires u-boot on the platform. + source "arch/mips/alchemy/Kconfig" source "arch/mips/ath25/Kconfig" source "arch/mips/ath79/Kconfig" diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index 8d98af5c7201..9a8393e6b4a8 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -21,7 +21,8 @@ void __init early_init_devtree(void *params) { - __be32 *dtb = (u32 *)__dtb_start; + __be32 __maybe_unused *dtb = (u32 *)__dtb_start; + #if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR) if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) == OF_DT_HEADER) { @@ -30,8 +31,11 @@ void __init early_init_devtree(void *params) return; } #endif + +#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER) params = (void *)__dtb_start; +#endif early_init_dt_scan(params); } diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h new file mode 100644 index 000000000000..47c5a1991d10 --- /dev/null +++ b/arch/parisc/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <uapi/asm/mman.h> + +/* PARISC cannot allow mdwe as it needs writable stacks */ +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return false; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index f0a4cf01e85c..78302f6c2580 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include <asm/page.h> #include <asm/vdso/timebase.h> #include <asm/barrier.h> #include <asm/unistd.h> @@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void); static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { - return (void *)vd + PAGE_SIZE; + return (void *)vd + (1U << CONFIG_PAGE_SHIFT); } #endif diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 252d63942f34..5b3115a19852 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -151,7 +151,7 @@ endif endif vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg -vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so +vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 97fcde30e247..9f8ea0e33eb1 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -593,6 +593,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } +#define pgprot_nx pgprot_nx +static inline pgprot_t pgprot_nx(pgprot_t _prot) +{ + return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC); +} + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t _prot) { diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index 980094c2e976..ac80216549ff 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -36,7 +36,8 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); ulong) \ __attribute__((alias(__stringify(___se_##prefix##name)))); \ __diag_pop(); \ - static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + __used; \ static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) #define SC_RISCV_REGS_TO_ARGS(x, ...) \ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index ec0cab9fbddd..72ec1d9bd3f3 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ if (unlikely(__kr_err)) \ @@ -328,7 +328,7 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 10aaa83db89e..95050ebe9ad0 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -34,7 +34,7 @@ #define AT_L3_CACHEGEOMETRY 47 /* entries in ARCH_DLINFO */ -#define AT_VECTOR_SIZE_ARCH 9 +#define AT_VECTOR_SIZE_ARCH 10 #define AT_MINSIGSTKSZ 51 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 62fa393b2eb2..3df4cb788c1f 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -74,5 +74,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@ rm $@.tmp # actual build commands -quiet_cmd_compat_vdsoas = VDSOAS $@ +quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 37e87fdcf6a0..30e12b310cab 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) */ lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(__patch_insn_set); @@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (!riscv_patch_in_stop_machine) lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return ret; } NOKPROBE_SYMBOL(__patch_insn_write); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 92922dbd5b5c..e4bc61c4e58a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -27,8 +27,6 @@ #include <asm/vector.h> #include <asm/cpufeature.h> -register unsigned long gp_in_global __asm__("gp"); - #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> unsigned long __stack_chk_guard __read_mostly; @@ -37,7 +35,7 @@ EXPORT_SYMBOL(__stack_chk_guard); extern asmlinkage void ret_from_fork(void); -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { cpu_do_idle(); } @@ -207,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 501e66debf69..5a2edd7f027e 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_set_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 868d6280cf66..05a16b1f0aee 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -122,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); - dump_instr(KERN_EMERG, regs); + dump_instr(KERN_INFO, regs); } force_sig_fault(signo, code, (void __user *)addr); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9b517fe1b8a8..272c431ac5b9 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -37,6 +37,7 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index 39e72aa016a4..b467ba5ed910 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c @@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) raw_spin_lock_irqsave(&irqd->lock, flags); sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK; - if (!pending && - ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) || - (sm == APLIC_SOURCECFG_SM_LEVEL_LOW))) + if (sm == APLIC_SOURCECFG_SM_INACTIVE) goto skip_write_pending; + if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH || + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) { + if (!pending) + goto skip_write_pending; + if ((irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) + goto skip_write_pending; + if (!(irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) + goto skip_write_pending; + } + if (pending) irqd->state |= APLIC_IRQ_STATE_PENDING; else @@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) static bool aplic_read_input(struct aplic *aplic, u32 irq) { - bool ret; - unsigned long flags; + u32 sourcecfg, sm, raw_input, irq_inverted; struct aplic_irq *irqd; + unsigned long flags; + bool ret = false; if (!irq || aplic->nr_irqs <= irq) return false; irqd = &aplic->irqs[irq]; raw_spin_lock_irqsave(&irqd->lock, flags); - ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false; + + sourcecfg = irqd->sourcecfg; + if (sourcecfg & APLIC_SOURCECFG_D) + goto skip; + + sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) + goto skip; + + raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0; + irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || + sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; + ret = !!(raw_input ^ irq_inverted); + +skip: raw_spin_unlock_irqrestore(&irqd->lock, flags); return ret; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f4a6124d25c9..994adc26db4b 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) { - return copy_isa_ext_reg_indices(vcpu, NULL);; + return copy_isa_ext_reg_indices(vcpu, NULL); } static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 893566e004b7..07d743f87b3f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -99,7 +99,7 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, +static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { @@ -200,7 +200,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID, + __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID, start, end - start, PAGE_SIZE); } diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index aac190085472..1adf2f39ce59 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1463,6 +1463,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, if (ret < 0) return ret; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + const struct btf_func_model *fm; + int idx; + + fm = bpf_jit_find_kfunc_model(ctx->prog, insn); + if (!fm) + return -EINVAL; + + for (idx = 0; idx < fm->nr_args; idx++) { + u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx); + + if (fm->arg_size[idx] == sizeof(int)) + emit_sextw(reg, reg, ctx); + } + } + ret = emit_call(addr, fixed_addr, ctx); if (ret) return ret; diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 7138d189cc42..0c4cad7d5a5b 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,31 +15,31 @@ #include <asm/barrier.h> #include <asm/cmpxchg.h> -static inline int arch_atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { return __atomic_read(v); } #define arch_atomic_read arch_atomic_read -static inline void arch_atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { __atomic_set(v, i); } #define arch_atomic_set arch_atomic_set -static inline int arch_atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter) + i; } #define arch_atomic_add_return arch_atomic_add_return -static inline int arch_atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter); } #define arch_atomic_fetch_add arch_atomic_fetch_add -static inline void arch_atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { __atomic_add(i, &v->counter); } @@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) #define ATOMIC_OPS(op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ +static __always_inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __atomic_##op(i, &v->counter); \ } \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ { \ return __atomic_##op##_barrier(i, &v->counter); \ } @@ -74,7 +74,7 @@ ATOMIC_OPS(xor) #define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return __atomic_cmpxchg(&v->counter, old, new); } @@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) #define ATOMIC64_INIT(i) { (i) } -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __atomic64_read(v); } #define arch_atomic64_read arch_atomic64_read -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __atomic64_set(v, i); } #define arch_atomic64_set arch_atomic64_set -static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter) + i; } #define arch_atomic64_add_return arch_atomic64_add_return -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline void arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __atomic64_add(i, (long *)&v->counter); } @@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v) #define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return __atomic64_cmpxchg((long *)&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -#define ATOMIC64_OPS(op) \ -static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ -{ \ - __atomic64_##op(i, (long *)&v->counter); \ -} \ -static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ -{ \ - return __atomic64_##op##_barrier(i, (long *)&v->counter); \ +#define ATOMIC64_OPS(op) \ +static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ +{ \ + __atomic64_##op(i, (long *)&v->counter); \ +} \ +static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + return __atomic64_##op##_barrier(i, (long *)&v->counter); \ } ATOMIC64_OPS(and) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 50510e08b893..7fa5f96a553a 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -8,7 +8,7 @@ #ifndef __ARCH_S390_ATOMIC_OPS__ #define __ARCH_S390_ATOMIC_OPS__ -static inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const atomic_t *v) { int c; @@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v) return c; } -static inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(atomic_t *v, int i) { asm volatile( " st %1,%0\n" : "=R" (v->counter) : "d" (i)); } -static inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline s64 __atomic64_read(const atomic64_t *v) { s64 c; @@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v) return c; } -static inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(atomic64_t *v, s64 i) { asm volatile( " stg %1,%0\n" @@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \ -static inline op_type op_name(op_type val, op_type *ptr) \ +static __always_inline op_type op_name(op_type val, op_type *ptr) \ { \ op_type old; \ \ @@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi") #else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #define __ATOMIC_OP(op_name, op_string) \ -static inline int op_name(int val, int *ptr) \ +static __always_inline int op_name(int val, int *ptr) \ { \ int old, new; \ \ @@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr") #undef __ATOMIC_OPS #define __ATOMIC64_OP(op_name, op_string) \ -static inline long op_name(long val, long *ptr) \ +static __always_inline long op_name(long val, long *ptr) \ { \ long old, new; \ \ @@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -static inline int __atomic_cmpxchg(int *ptr, int old, int new) +static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new) { asm volatile( " cs %[old],%[new],%[ptr]" @@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new) return old; } -static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) +static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { int old_expected = old; @@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) return old == old_expected; } -static inline long __atomic64_cmpxchg(long *ptr, long old, long new) +static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) { asm volatile( " csg %[old],%[new],%[ptr]" @@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) +static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) { long old_expected = old; diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index bf15da0fedbc..0e3da500e98c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -12,12 +12,12 @@ #define PREEMPT_NEED_RESCHED 0x80000000 #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED; } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { int old, new; @@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc) old, new) != old); } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED); } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { /* * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES @@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val) __atomic_add(val, &S390_lowcore.preempt_count); } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { __preempt_count_add(-val); } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return __atomic_add(-1, &S390_lowcore.preempt_count) == 1; } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(READ_ONCE(S390_lowcore.preempt_count) == preempt_offset); @@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset) #define PREEMPT_ENABLED (0) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count); } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { S390_lowcore.preempt_count = pc; } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return false; } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { S390_lowcore.preempt_count += val; } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { S390_lowcore.preempt_count -= val; } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return !--S390_lowcore.preempt_count && tif_need_resched(); } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(preempt_count() == preempt_offset && tif_need_resched()); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 787394978bc0..3dc85638bc63 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -635,6 +635,7 @@ SYM_DATA_START_LOCAL(daton_psw) SYM_DATA_END(daton_psw) .section .rodata, "a" + .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) #include "asm/syscall_table.h" diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 823d652e3917..4ad472d130a3 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event) event->cpu); struct paicrypt_map *cpump = mp->mapptr; - cpump->event = NULL; static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" @@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags) static void paicrypt_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paicrypt_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 616a25606cd6..a6da7e0cc7a6 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event) free_page(PAI_SAVE_AREA(event)); mutex_lock(&paiext_reserve_mutex); - cpump->event = NULL; if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); @@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags) static void paiext_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paiext_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index c421dd44ffbe..0c66b32e0f9f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; gmap = (struct gmap *)S390_lowcore.gmap; - if (regs->cr1 == gmap->asce) + if (gmap && gmap->asce == regs->cr1) return GMAP_FAULT; return KERNEL_FAULT; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b418333bb086..5af0402e94b8 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size) * PLT for hotpatchable calls. The calling convention is the same as for the * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered. */ -extern const char bpf_plt[]; -extern const char bpf_plt_ret[]; -extern const char bpf_plt_target[]; -extern const char bpf_plt_end[]; -#define BPF_PLT_SIZE 32 +struct bpf_plt { + char code[16]; + void *ret; + void *target; +} __packed; +extern const struct bpf_plt bpf_plt; asm( ".pushsection .rodata\n" " .balign 8\n" @@ -531,15 +532,14 @@ asm( " .balign 8\n" "bpf_plt_ret: .quad 0\n" "bpf_plt_target: .quad 0\n" - "bpf_plt_end:\n" " .popsection\n" ); -static void bpf_jit_plt(void *plt, void *ret, void *target) +static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { - memcpy(plt, bpf_plt, BPF_PLT_SIZE); - *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret; - *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret; + memcpy(plt, &bpf_plt, sizeof(*plt)); + plt->ret = ret; + plt->target = target; } /* @@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; if (jit->prg_buf) - bpf_jit_plt(jit->prg_buf + jit->prg, + bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg), jit->prg_buf + jit->prologue_plt_ret, NULL); - jit->prg += BPF_PLT_SIZE; + jit->prg += sizeof(struct bpf_plt); } static int get_probe_mem_regno(const u8 *insn) @@ -2040,9 +2040,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_jit jit; int pass; - if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE)) - return orig_fp; - if (!fp->jit_requested) return orig_fp; @@ -2148,14 +2145,11 @@ bool bpf_jit_supports_far_kfunc_call(void) int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { + struct bpf_plt expected_plt, current_plt, new_plt, *plt; struct { u16 opc; s32 disp; } __packed insn; - char expected_plt[BPF_PLT_SIZE]; - char current_plt[BPF_PLT_SIZE]; - char new_plt[BPF_PLT_SIZE]; - char *plt; char *ret; int err; @@ -2174,18 +2168,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, */ } else { /* Verify the PLT. */ - plt = (char *)ip + (insn.disp << 1); - err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE); + plt = ip + (insn.disp << 1); + err = copy_from_kernel_nofault(¤t_plt, plt, + sizeof(current_plt)); if (err < 0) return err; ret = (char *)ip + 6; - bpf_jit_plt(expected_plt, ret, old_addr); - if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE)) + bpf_jit_plt(&expected_plt, ret, old_addr); + if (memcmp(¤t_plt, &expected_plt, sizeof(current_plt))) return -EINVAL; /* Adjust the call address. */ - bpf_jit_plt(new_plt, ret, new_addr); - s390_kernel_write(plt + (bpf_plt_target - bpf_plt), - new_plt + (bpf_plt_target - bpf_plt), + bpf_jit_plt(&new_plt, ret, new_addr); + s390_kernel_write(&plt->target, &new_plt.target, sizeof(void *)); } diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 6a1f36df6a18..cf0ad89f5639 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -28,7 +28,7 @@ obj-y += net/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ -obj-y += virt/svm/ +obj-y += virt/ # for cleaning subdir- += boot tools diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 39886bab943a..4fff6ed46e90 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2439,6 +2439,8 @@ config USE_X86_SEG_SUPPORT # with named address spaces - see GCC PR sanitizer/111736. # depends on !KASAN + # -fsanitize=thread (KCSAN) is also incompatible. + depends on !KCSAN config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 662d9d4033e6..5ab93fcdd691 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -251,8 +251,6 @@ archheaders: libs-y += arch/x86/lib/ -core-y += arch/x86/virt/ - # drivers-y are linked after core-y drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/x86/pci/ diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 719e939050cb..876fc6d46a13 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -15,10 +15,12 @@ */ #include <linux/linkage.h> +#include <asm/asm-offsets.h> #include <asm/msr.h> #include <asm/page_types.h> #include <asm/processor-flags.h> #include <asm/segment.h> +#include <asm/setup.h> .code64 .text @@ -149,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk) SYM_FUNC_START(efi32_stub_entry) call 1f 1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx /* Clear BSS */ xorl %eax, %eax @@ -163,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + movl %esi, 8(%ebx) jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) #endif @@ -239,8 +243,6 @@ SYM_FUNC_END(efi_enter32) * * Arguments: %ecx image handle * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) * * Since this is the point of no return for ordinary execution, no registers * are considered live except for the function parameters. [Note that the EFI @@ -266,9 +268,18 @@ SYM_FUNC_START_LOCAL(efi32_entry) leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) movl %edx, 4(%ebx) - movl %esi, 8(%ebx) movb $0x0, 12(%ebx) // efi_is64 + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -294,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry) movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi + jmp efi32_entry // pass %ecx, %edx // no other registers remain live 2: popl %edi // restore callee-save registers diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index d07be9d05cd0..b31ef2424d19 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -3,19 +3,28 @@ * Confidential Computing Platform Capability checks * * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * Author: Tom Lendacky <thomas.lendacky@amd.com> */ #include <linux/export.h> #include <linux/cc_platform.h> +#include <linux/string.h> +#include <linux/random.h> +#include <asm/archrandom.h> #include <asm/coco.h> #include <asm/processor.h> enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; u64 cc_mask __ro_after_init; +static struct cc_attr_flags { + __u64 host_sev_snp : 1, + __resv : 63; +} cc_flags; + static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { @@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + case CC_ATTR_HOST_SEV_SNP: + return cc_flags.host_sev_snp; + default: return false; } @@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); + +static void amd_cc_platform_clear(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 0; + break; + default: + break; + } +} + +void cc_platform_clear(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_clear(attr); + break; + default: + break; + } +} + +static void amd_cc_platform_set(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 1; + break; + default: + break; + } +} + +void cc_platform_set(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_set(attr); + break; + default: + break; + } +} + +__init void cc_random_init(void) +{ + /* + * The seed is 32 bytes (in units of longs), which is 256 bits, which + * is the security level that the RNG is targeting. + */ + unsigned long rng_seed[32 / sizeof(long)]; + size_t i, longs; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * Since the CoCo threat model includes the host, the only reliable + * source of entropy that can be neither observed nor manipulated is + * RDRAND. Usually, RDRAND failure is considered tolerable, but since + * CoCo guests have no other unobservable source of entropy, it's + * important to at least ensure the RNG gets some initial random seeds. + */ + for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { + longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); + + /* + * A zero return value means that the guest doesn't have RDRAND + * or the CPU is physically broken, and in both cases that + * means most crypto inside of the CoCo instance will be + * broken, defeating the purpose of CoCo in the first place. So + * just panic here because it's absolutely unsafe to continue + * executing. + */ + if (longs == 0) + panic("RDRAND is defective."); + } + add_device_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); +} diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fd63051bbbbb..3d64bcc403cf 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index aec16e581f5b..985ef3b47919 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = /* * AMD Performance Monitor Family 17h and later: */ -static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, @@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; +static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, +}; + +static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120, +}; + static u64 amd_pmu_event_map(int hw_event) { - if (boot_cpu_data.x86 >= 0x17) - return amd_f17h_perfmon_event_map[hw_event]; + if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a) + return amd_zen4_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19) + return amd_zen2_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN1)) + return amd_zen1_perfmon_event_map[hw_event]; return amd_perfmon_event_map[hw_event]; } @@ -904,8 +933,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; - /* Read branch records before unfreezing */ - if (status & GLOBAL_STATUS_LBRS_FROZEN) { + /* Read branch records */ + if (x86_pmu.lbr_nr) { amd_pmu_lbr_read(); status &= ~GLOBAL_STATUS_LBRS_FROZEN; } diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 4a1e600314d5..5149830c7c4f 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void) wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } @@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2641ba620f12..e010bfed8417 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu = event->pmu; /* - * Make sure we get updated with the first PEBS - * event. It will trigger also during removal, but - * that does not hurt: + * Make sure we get updated with the first PEBS event. + * During removal, ->pebs_data_cfg is still valid for + * the last PEBS event. Don't clear it. */ - if (cpuc->n_pebs == 1) + if ((cpuc->n_pebs == 1) && add) cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; if (needed_cb != pebs_needs_sched_cb(cpuc)) { diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index fcd20c6dc7f9..67b68d0d17d1 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) return dest; } static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, - void *func) + void *func, void *ip) { return 0; } diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 076bf8dee702..25466c4d2134 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -14,6 +14,7 @@ #include <asm/asm.h> #include <asm/fred.h> #include <asm/gsseg.h> +#include <asm/nospec-branch.h> #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index fb7388bbc212..c086699b0d0c 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask) u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); +void cc_random_init(void); #else #define cc_vendor (CC_VENDOR_NONE) @@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val) { return val; } +static inline void cc_random_init(void) { } #endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a1273698fc43..686e92d2663e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" @@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f0337f7bcf16..a38f8f9ba657 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -460,6 +460,14 @@ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ /* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ + +/* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h index 152239f95541..7835b2cdff04 100644 --- a/arch/x86/include/asm/crash_reserve.h +++ b/arch/x86/include/asm/crash_reserve.h @@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void) #endif } +#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + #endif /* _X86_CRASH_RESERVE_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index da4054fbf533..c492bdc97b05 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -155,6 +155,7 @@ #define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 (DISABLE_SEV_SNP) #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index fc3a8a3c7ffe..170c89ed22fc 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -262,11 +262,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -282,8 +291,8 @@ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) VALIDATE_UNRET_END - ALTERNATIVE_3 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET + ALTERNATIVE_2 "", \ "call entry_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif @@ -342,6 +351,8 @@ extern void retbleed_return_thunk(void); static inline void retbleed_return_thunk(void) {} #endif +extern void srso_alias_untrain_ret(void); + #ifdef CONFIG_MITIGATION_SRSO extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 7ba1726b71c7..e9187ddd3d1f 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 9477b4053bce..7f57382afee4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -218,17 +218,16 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages); void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages); -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); +void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); -void kdump_sev_callback(void); void sev_show_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } @@ -244,12 +243,12 @@ static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } -static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline void snp_dmi_setup(void) { } static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; @@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } -static inline void kdump_sev_callback(void) { } static inline void sev_show_status(void) { } #endif @@ -270,6 +268,7 @@ int psmash(u64 pfn); int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); +void kdump_sev_callback(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } @@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as } static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} +static inline void kdump_sev_callback(void) { } #endif #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b89b40f250e6..6149eabe200f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -30,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ad29984d5e39..ef11aa4cab42 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -694,6 +694,7 @@ enum sev_cmd_id { struct kvm_sev_cmd { __u32 id; + __u32 pad0; __u64 data; __u32 error; __u32 sev_fd; @@ -704,28 +705,35 @@ struct kvm_sev_launch_start { __u32 policy; __u64 dh_uaddr; __u32 dh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_launch_update_data { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_launch_secret { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_launch_measure { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_guest_status { @@ -738,33 +746,43 @@ struct kvm_sev_dbg { __u64 src_uaddr; __u64 dst_uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_attestation_report { __u8 mnonce[16]; __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_send_start { __u32 policy; + __u32 pad0; __u64 pdh_cert_uaddr; __u32 pdh_cert_len; + __u32 pad1; __u64 plat_certs_uaddr; __u32 plat_certs_len; + __u32 pad2; __u64 amd_certs_uaddr; __u32 amd_certs_len; + __u32 pad3; __u64 session_uaddr; __u32 session_len; + __u32 pad4; }; struct kvm_sev_send_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_receive_start { @@ -772,17 +790,22 @@ struct kvm_sev_receive_start { __u32 policy; __u64 pdh_uaddr; __u32 pdh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_receive_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6bc3456a8ebf..a1efa7907a0b 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { __u32 token; __u8 pad[56]; - __u32 enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 30335182b6b0..e92ff0c11db8 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr) return !bcmp(pad, insn_buff, tmpl_size); } -int x86_call_depth_emit_accounting(u8 **pprog, void *func) +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip) { unsigned int tmpl_size = SKL_TMPL_SIZE; u8 insn_buff[MAX_PATCH_LEN]; @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) return 0; memcpy(insn_buff, skl_call_thunk_template, tmpl_size); - apply_relocation(insn_buff, tmpl_size, *pprog, + apply_relocation(insn_buff, tmpl_size, ip, skl_call_thunk_template, tmpl_size); memcpy(*pprog, insn_buff, tmpl_size); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6d8677e80ddb..9bf17c9c29da 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } +static void bsp_determine_snp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; + + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined for. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + c->x86 >= 0x19 && snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } - if (cpu_has(c, X86_FEATURE_SEV_SNP)) { - /* - * RMP table entry format is not architectural and it can vary by processor - * and is defined by the per-processor PPR. Restrict SNP support on the - * known CPU model and family for which the RMP table entry format is - * currently defined for. - */ - if (!boot_cpu_has(X86_FEATURE_ZEN3) && - !boot_cpu_has(X86_FEATURE_ZEN4) && - !boot_cpu_has(X86_FEATURE_ZEN5)) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - else if (!snp_probe_rmptable_info()) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - } - + bsp_determine_snp(c); return; warn: diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b5cc557cfc37..84d41be6d06b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 422a4ddc2ab7..7b29ebda024f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; rdmsr(MSR_AMD64_SYSCFG, lo, hi); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c99f26ebe7a6..1a8687f8073a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + /* Only continue if tick_nohz_full_mask has been initialized. */ + if (!tick_nohz_full_enabled()) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0dad49a09b7a..a515328d9d7d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b0449..53935b4d62e3 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include <linux/cc_platform.h> #include <linux/ioport.h> #include <linux/eisa.h> #include <linux/io.h> @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4cadfd606e8e..7f0732bc0ccd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); +static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled); static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; @@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void) { u32 flags = 0; - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { flags = __this_cpu_read(apf_reason.flags); __this_cpu_write(apf_reason.flags, 0); } @@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) inc_irq_stat(irq_hv_callback_count); - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { token = __this_cpu_read(apf_reason.token); kvm_async_pf_task_wake(token); __this_cpu_write(apf_reason.token, 0); @@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR); wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); - __this_cpu_write(apf_reason.enabled, 1); + __this_cpu_write(async_pf_enabled, true); pr_debug("setup async PF for cpu %d\n", smp_processor_id()); } @@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__this_cpu_read(apf_reason.enabled)) + if (!__this_cpu_read(async_pf_enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __this_cpu_write(apf_reason.enabled, 0); + __this_cpu_write(async_pf_enabled, false); pr_debug("disable async PF for cpu %d\n", smp_processor_id()); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 9a5b372c706f..ed163c8c8604 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); static char *nmi_check_stall_msg[] = { /* */ -/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */ +/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */ /* | +------ cpu_is_offline(cpu) */ /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ /* | | | NMI handler has been invoked. */ @@ -628,22 +628,26 @@ void nmi_backtrace_stall_check(const struct cpumask *btp) nmi_seq = READ_ONCE(nsp->idt_nmi_seq); if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) { msgp = "CPU entered NMI handler function, but has not exited"; - } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) { - msgp = "CPU is handling NMIs"; - } else { - idx = ((nsp->idt_seq_snap & 0x1) << 2) | + } else if (nsp->idt_nmi_seq_snap == nmi_seq || + nsp->idt_nmi_seq_snap + 1 == nmi_seq) { + idx = ((nmi_seq & 0x1) << 2) | (cpu_is_offline(cpu) << 1) | (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls)); msgp = nmi_check_stall_msg[idx]; if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1)) modp = ", but OK because ignore_nmis was set"; - if (nmi_seq & 0x1) - msghp = " (CPU currently in NMI handler function)"; - else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) + if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) msghp = " (CPU exited one NMI handler function)"; + else if (nmi_seq & 0x1) + msghp = " (CPU currently in NMI handler function)"; + else + msghp = " (CPU was never in an NMI handler function)"; + } else { + msgp = "CPU is handling NMIs"; } - pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n", - __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies)); + pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp); + pr_alert("%s: last activity: %lu jiffies ago.\n", + __func__, j - READ_ONCE(nsp->recv_jiffies)); } } diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dc..cc2c34ba7228 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ef206500ed6f..e125e059e2c4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include <linux/console.h> #include <linux/crash_dump.h> #include <linux/dma-map-ops.h> -#include <linux/dmi.h> #include <linux/efi.h> #include <linux/ima.h> #include <linux/init_ohci1394_dma.h> @@ -36,6 +35,7 @@ #include <asm/bios_ebda.h> #include <asm/bugs.h> #include <asm/cacheinfo.h> +#include <asm/coco.h> #include <asm/cpu.h> #include <asm/efi.h> #include <asm/gart.h> @@ -902,7 +902,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); reserve_ibft_region(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this @@ -992,6 +992,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ mem_encrypt_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b59b09c2f284..38ad066179d8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/io.h> #include <linux/psp-sev.h> +#include <linux/dmi.h> #include <uapi/linux/sev-guest.h> #include <asm/init.h> @@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, unsigned long vaddr_end, int op) { @@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -2287,16 +2284,6 @@ static int __init snp_init_platform_device(void) } device_initcall(snp_init_platform_device); -void kdump_sev_callback(void) -{ - /* - * Do wbinvd() on remote CPUs when SNP is enabled in order to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); -} - void sev_show_status(void) { int i; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a42830dc151b..d5dc5a92635a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include <linux/dmi.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/export.h> @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3aaf7e86a859..0ebdd088f28b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -122,6 +122,7 @@ config KVM_AMD_SEV default y depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) + select ARCH_HAS_CC_PLATFORM help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index adba49afb5fe..bfc0bfcb2bc6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 return 0; } -static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, - const char *sig) +static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries, + int nent, const char *sig) { struct kvm_hypervisor_cpuid cpuid = {}; struct kvm_cpuid_entry2 *entry; u32 base; for_each_possible_hypervisor_cpuid_base(base) { - entry = kvm_find_cpuid_entry(vcpu, base); + entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (entry) { u32 signature[3]; @@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp return cpuid; } -static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu, - struct kvm_cpuid_entry2 *entries, int nent) +static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, + const char *sig) { - u32 base = vcpu->arch.kvm_cpuid.base; - - if (!base) - return NULL; + return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, sig); +} - return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, +static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries, + int nent, u32 kvm_cpuid_base) +{ + return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES, KVM_CPUID_INDEX_NOT_SIGNIFICANT); } static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) { - return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent); + u32 base = vcpu->arch.kvm_cpuid.base; + + if (!base) + return NULL; + + return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, base); } void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) @@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e int nent) { struct kvm_cpuid_entry2 *best; + struct kvm_hypervisor_cpuid kvm_cpuid; best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (best) { @@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent); - if (kvm_hlt_in_guest(vcpu->kvm) && best && - (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) - best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE); + if (kvm_cpuid.base) { + best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base); + if (kvm_hlt_in_guest(vcpu->kvm) && best) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + } if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index aadefcaa9561..58ac8d69c94b 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ae0ac12382b9..61a7531d41b0 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -84,9 +84,10 @@ struct enc_region { }; /* Called with the sev_bitmap_lock held, or on shutdown */ -static int sev_flush_asids(int min_asid, int max_asid) +static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) { - int ret, asid, error = 0; + int ret, error = 0; + unsigned int asid; /* Check if there are any ASIDs to reclaim before performing a flush */ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); @@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(int min_asid, int max_asid) +static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) { if (sev_flush_asids(min_asid, max_asid)) return false; @@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - int asid, min_asid, max_asid, ret; + /* + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. + * Note: min ASID can end up larger than the max if basic SEV support is + * effectively disabled by disallowing use of ASIDs for SEV guests. + */ + unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; + unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + unsigned int asid; bool retry = true; + int ret; + + if (min_asid > max_asid) + return -ENOTTY; WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); @@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev) mutex_lock(&sev_bitmap_lock); - /* - * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. - * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. - */ - min_asid = sev->es_active ? 1 : min_sev_asid; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); if (asid > max_asid) { @@ -179,7 +186,8 @@ again: mutex_unlock(&sev_bitmap_lock); - return asid; + sev->asid = asid; + return 0; e_uncharge: sev_misc_cg_uncharge(sev); put_misc_cg(sev->misc_cg); @@ -187,7 +195,7 @@ e_uncharge: return ret; } -static int sev_get_asid(struct kvm *kvm) +static unsigned int sev_get_asid(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_platform_init_args init_args = {0}; - int asid, ret; + int ret; if (kvm->created_vcpus) return -EINVAL; - ret = -EBUSY; if (unlikely(sev->active)) - return ret; + return -EINVAL; sev->active = true; sev->es_active = argp->id == KVM_SEV_ES_INIT; - asid = sev_asid_new(sev); - if (asid < 0) + ret = sev_asid_new(sev); + if (ret) goto e_no_asid; - sev->asid = asid; init_args.probe = false; ret = sev_platform_init(&init_args); @@ -287,8 +293,8 @@ e_no_asid: static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { + unsigned int asid = sev_get_asid(kvm); struct sev_data_activate activate; - int asid = sev_get_asid(kvm); int ret; /* activate ASID on the given handle */ @@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void) goto out; } - sev_asid_count = max_sev_asid - min_sev_asid + 1; - WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + if (min_sev_asid <= max_sev_asid) { + sev_asid_count = max_sev_asid - min_sev_asid + 1; + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + } sev_supported = true; /* SEV-ES support requested? */ @@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void) out: if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", - sev_supported ? "enabled" : "disabled", + sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : + "unusable" : + "disabled", min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", @@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) */ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { - int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + unsigned int asid = sev_get_asid(vcpu->kvm); /* * Note! The address must be a kernel address, as regular page walk @@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - int asid = sev_get_asid(svm->vcpu.kvm); + unsigned int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -3174,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) unsigned long pfn; struct page *p; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); /* diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 88659de4d2a7..c6b4b1728006 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit, * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, - TP_PROTO(__u64 rip, int asid, u64 address), + TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( - __field( __u64, rip ) - __field( int, asid ) - __field( __u64, address ) + __field( __u64, rip ) + __field( unsigned int, asid ) + __field( __u64, address ) ), TP_fast_assign( @@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", + TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 721b528da9ac..e674ccf720b9 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -163,6 +163,7 @@ SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) lfence jmp srso_alias_return_thunk SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) .popsection .pushsection .text..__x86.rethunk_safe @@ -224,10 +225,16 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" #else /* !CONFIG_MITIGATION_SRSO */ +/* Dummy for the alternative in CALL_UNTRAIN_RET. */ +SYM_CODE_START(srso_alias_untrain_ret) + ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) #define JMP_SRSO_UNTRAIN_RET "ud2" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" #endif /* CONFIG_MITIGATION_SRSO */ #ifdef CONFIG_MITIGATION_UNRET_ENTRY @@ -319,9 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret) #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ - JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \ - JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS + ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index a204a332c71f..968d7005f4a7 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_leaf(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 70b91de2e053..422602f6039b 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -492,6 +492,24 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ENABLED) ia32_disable(); + + /* + * Override init functions that scan the ROM region in SEV-SNP guests, + * as this memory is not pre-validated and would thus cause a crash. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.pci.init_irq = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; + + /* + * DMI setup behavior for SEV-SNP guests depends on + * efi_enabled(EFI_CONFIG_TABLES), which hasn't been + * parsed yet. snp_dmi_setup() will run after that + * parsing has happened. + */ + x86_init.resources.dmi_setup = snp_dmi_setup; + } } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 104544359d69..025fd7ea5d69 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -24,6 +24,7 @@ #include <linux/memblock.h> #include <linux/init.h> +#include <asm/pgtable_areas.h> #include "numa_internal.h" diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 0d72183b5dd0..36b603d0cdde 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a7ba8e178645..df5fac428408 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip) static int emit_rsb_call(u8 **pprog, void *func, void *ip) { OPTIMIZER_HIDE_VAR(func); - x86_call_depth_emit_accounting(pprog, func); + ip += x86_call_depth_emit_accounting(pprog, func, ip); return emit_patch(pprog, func, ip, 0xE8); } @@ -1972,20 +1972,17 @@ populate_extable: /* call */ case BPF_JMP | BPF_CALL: { - int offs; + u8 *ip = image + addrs[i - 1]; func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); - if (!imm32) - return -EINVAL; - offs = 7 + x86_call_depth_emit_accounting(&prog, func); - } else { - if (!imm32) - return -EINVAL; - offs = x86_call_depth_emit_accounting(&prog, func); + ip += 7; } - if (emit_call(&prog, func, image + addrs[i - 1] + offs)) + if (!imm32) + return -EINVAL; + ip += x86_call_depth_emit_accounting(&prog, func, ip); + if (emit_call(&prog, func, ip)) return -EINVAL; break; } @@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * Direct-call fentry stub, as such it needs accounting for the * __fentry__ call. */ - x86_call_depth_emit_accounting(&prog, NULL); + x86_call_depth_emit_accounting(&prog, NULL, image); } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile index 1e36502cd738..ea343fc392dc 100644 --- a/arch/x86/virt/Makefile +++ b/arch/x86/virt/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += vmx/ +obj-y += svm/ vmx/ diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index cffe1157a90a..ab0e8448bb6e 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void) u64 rmptable_size; u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; if (!amd_iommu_snp_en) - return 0; + goto nosnp; if (!probed_rmp_size) goto nosnp; @@ -225,7 +225,7 @@ skip_enable: return 0; nosnp: - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); return -ENOSYS; } @@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level) { struct rmpentry *large_entry, *entry; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return ERR_PTR(-ENODEV); entry = get_rmpentry(pfn); @@ -363,7 +363,7 @@ int psmash(u64 pfn) unsigned long paddr = pfn << PAGE_SHIFT; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; if (!pfn_valid(pfn)) @@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state) unsigned long paddr = pfn << PAGE_SHIFT; int ret, level; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; level = RMP_TO_PG_LEVEL(state->pagesize); @@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages) spin_unlock(&snp_leaked_pages_list_lock); } EXPORT_SYMBOL_GPL(snp_leak_pages); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the local CPU. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + wbinvd(); +} diff --git a/block/bdev.c b/block/bdev.c index 7a5f611c3d2e..b8e32d933a63 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -583,9 +583,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder, mutex_unlock(&bdev->bd_holder_lock); bd_clear_claiming(whole, holder); mutex_unlock(&bdev_lock); - - if (hops && hops->get_holder) - hops->get_holder(holder); } /** @@ -608,7 +605,6 @@ EXPORT_SYMBOL(bd_abort_claiming); static void bd_end_claim(struct block_device *bdev, void *holder) { struct block_device *whole = bdev_whole(bdev); - const struct blk_holder_ops *hops = bdev->bd_holder_ops; bool unblock = false; /* @@ -631,9 +627,6 @@ static void bd_end_claim(struct block_device *bdev, void *holder) whole->bd_holder = NULL; mutex_unlock(&bdev_lock); - if (hops && hops->put_holder) - hops->put_holder(holder); - /* * If this was the last claim, remove holder link and unblock evpoll if * it was a write holder. @@ -776,17 +769,17 @@ void blkdev_put_no_open(struct block_device *bdev) static bool bdev_writes_blocked(struct block_device *bdev) { - return bdev->bd_writers == -1; + return bdev->bd_writers < 0; } static void bdev_block_writes(struct block_device *bdev) { - bdev->bd_writers = -1; + bdev->bd_writers--; } static void bdev_unblock_writes(struct block_device *bdev) { - bdev->bd_writers = 0; + bdev->bd_writers++; } static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode) @@ -813,6 +806,11 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) bdev->bd_writers++; } +static inline bool bdev_unclaimed(const struct file *bdev_file) +{ + return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host); +} + static void bdev_yield_write_access(struct file *bdev_file) { struct block_device *bdev; @@ -820,14 +818,15 @@ static void bdev_yield_write_access(struct file *bdev_file) if (bdev_allow_write_mounted) return; + if (bdev_unclaimed(bdev_file)) + return; + bdev = file_bdev(bdev_file); - /* Yield exclusive or shared write access. */ - if (bdev_file->f_mode & FMODE_WRITE) { - if (bdev_writes_blocked(bdev)) - bdev_unblock_writes(bdev); - else - bdev->bd_writers--; - } + + if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED) + bdev_unblock_writes(bdev); + else if (bdev_file->f_mode & FMODE_WRITE) + bdev->bd_writers--; } /** @@ -907,6 +906,8 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; if (bdev_nowait(bdev)) bdev_file->f_mode |= FMODE_NOWAIT; + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_file->f_mode |= FMODE_WRITE_RESTRICTED; bdev_file->f_mapping = bdev->bd_inode->i_mapping; bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); bdev_file->private_data = holder; @@ -1012,6 +1013,20 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_file_open_by_path); +static inline void bd_yield_claim(struct file *bdev_file) +{ + struct block_device *bdev = file_bdev(bdev_file); + void *holder = bdev_file->private_data; + + lockdep_assert_held(&bdev->bd_disk->open_mutex); + + if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder))) + return; + + if (!bdev_unclaimed(bdev_file)) + bd_end_claim(bdev, holder); +} + void bdev_release(struct file *bdev_file) { struct block_device *bdev = file_bdev(bdev_file); @@ -1036,7 +1051,7 @@ void bdev_release(struct file *bdev_file) bdev_yield_write_access(bdev_file); if (holder) - bd_end_claim(bdev, holder); + bd_yield_claim(bdev_file); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -1057,6 +1072,39 @@ put_no_open: } /** + * bdev_fput - yield claim to the block device and put the file + * @bdev_file: open block device + * + * Yield claim on the block device and put the file. Ensure that the + * block device can be reclaimed before the file is closed which is a + * deferred operation. + */ +void bdev_fput(struct file *bdev_file) +{ + if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops)) + return; + + if (bdev_file->private_data) { + struct block_device *bdev = file_bdev(bdev_file); + struct gendisk *disk = bdev->bd_disk; + + mutex_lock(&disk->open_mutex); + bdev_yield_write_access(bdev_file); + bd_yield_claim(bdev_file); + /* + * Tell release we already gave up our hold on the + * device and if write restrictions are available that + * we already gave up write access to the device. + */ + bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host); + mutex_unlock(&disk->open_mutex); + } + + fput(bdev_file); +} +EXPORT_SYMBOL(bdev_fput); + +/** * lookup_bdev() - Look up a struct block_device by name. * @pathname: Name of the block device in the filesystem. * @dev: Pointer to the block device's dev_t, if found. diff --git a/block/blk-merge.c b/block/blk-merge.c index 2a06fd33039d..4e3483a16b75 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -726,7 +726,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, * which can be mixed are set in each bio and mark @rq as mixed * merged. */ -void blk_rq_set_mixed_merge(struct request *rq) +static void blk_rq_set_mixed_merge(struct request *rq) { blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; struct bio *bio; diff --git a/block/blk-mq.c b/block/blk-mq.c index 555ada922cf0..32afb87efbd0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -770,16 +770,11 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. - * For such case, force the completed nbytes to be equal to - * the BIO size so that bio_advance() sets the BIO remaining - * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) { + if (bio->bi_iter.bi_size != nbytes) bio->bi_status = BLK_STS_IOERR; - nbytes = bio->bi_iter.bi_size; - } else { + else bio->bi_iter.bi_sector = rq->__sector; - } } bio_advance(bio, nbytes); diff --git a/block/blk-settings.c b/block/blk-settings.c index 3c7d8d638ab5..cdbaef159c4b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -146,8 +146,7 @@ static int blk_validate_limits(struct queue_limits *lim) max_hw_sectors = min_not_zero(lim->max_hw_sectors, lim->max_dev_sectors); if (lim->max_user_sectors) { - if (lim->max_user_sectors > max_hw_sectors || - lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) + if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) return -EINVAL; lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors); } else { diff --git a/block/blk.h b/block/blk.h index 5cac4e29ae17..d9f584984bc4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -339,7 +339,6 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next); unsigned int blk_recalc_rq_segments(struct request *rq); -void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); diff --git a/block/ioctl.c b/block/ioctl.c index 0c76137adcaa..a9028a2c2db5 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -96,7 +96,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, unsigned long arg) { uint64_t range[2]; - uint64_t start, len; + uint64_t start, len, end; struct inode *inode = bdev->bd_inode; int err; @@ -117,7 +117,8 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, if (len & 511) return -EINVAL; - if (start + len > bdev_nr_bytes(bdev)) + if (check_add_overflow(start, len, &end) || + end > bdev_nr_bytes(bdev)) return -EINVAL; filemap_invalidate_lock(inode->i_mapping); diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c index 05402ef8964e..8aecbe4637f3 100644 --- a/crypto/asymmetric_keys/mscode_parser.c +++ b/crypto/asymmetric_keys/mscode_parser.c @@ -75,6 +75,9 @@ int mscode_note_digest_algo(void *context, size_t hdrlen, oid = look_up_OID(value, vlen); switch (oid) { + case OID_sha1: + ctx->digest_algo = "sha1"; + break; case OID_sha256: ctx->digest_algo = "sha256"; break; diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index 5b08c50722d0..231ad7b3789d 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -227,6 +227,9 @@ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen, struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { + case OID_sha1: + ctx->sinfo->sig->hash_algo = "sha1"; + break; case OID_sha256: ctx->sinfo->sig->hash_algo = "sha256"; break; @@ -278,6 +281,7 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen, ctx->sinfo->sig->pkey_algo = "rsa"; ctx->sinfo->sig->encoding = "pkcs1"; break; + case OID_id_ecdsa_with_sha1: case OID_id_ecdsa_with_sha224: case OID_id_ecdsa_with_sha256: case OID_id_ecdsa_with_sha384: diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index e5f22691febd..e314fd57e6f8 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -115,7 +115,8 @@ software_key_determine_akcipher(const struct public_key *pkey, */ if (!hash_algo) return -EINVAL; - if (strcmp(hash_algo, "sha224") != 0 && + if (strcmp(hash_algo, "sha1") != 0 && + strcmp(hash_algo, "sha224") != 0 && strcmp(hash_algo, "sha256") != 0 && strcmp(hash_algo, "sha384") != 0 && strcmp(hash_algo, "sha512") != 0 && diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c index 398983be77e8..2deff81f8af5 100644 --- a/crypto/asymmetric_keys/signature.c +++ b/crypto/asymmetric_keys/signature.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(decrypt_blob); * Sign the specified data blob using the private key specified by params->key. * The signature is wrapped in an encoding if params->encoding is specified * (eg. "pkcs1"). If the encoding needs to know the digest type, this can be - * passed through params->hash_algo (eg. "sha512"). + * passed through params->hash_algo (eg. "sha1"). * * Returns the length of the data placed in the signature buffer or an error. */ diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 487204d39426..bb0bffa271b5 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -198,6 +198,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, default: return -ENOPKG; /* Unsupported combination */ + case OID_sha1WithRSAEncryption: + ctx->cert->sig->hash_algo = "sha1"; + goto rsa_pkcs1; + case OID_sha256WithRSAEncryption: ctx->cert->sig->hash_algo = "sha256"; goto rsa_pkcs1; @@ -214,6 +218,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, ctx->cert->sig->hash_algo = "sha224"; goto rsa_pkcs1; + case OID_id_ecdsa_with_sha1: + ctx->cert->sig->hash_algo = "sha1"; + goto ecdsa; + case OID_id_rsassa_pkcs1_v1_5_with_sha3_256: ctx->cert->sig->hash_algo = "sha3-256"; goto rsa_pkcs1; diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 986f331a5fc2..12e1c892f366 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -653,6 +653,30 @@ static const struct akcipher_testvec rsa_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = { { .key = + "\x04\xf7\x46\xf8\x2f\x15\xf6\x22\x8e\xd7\x57\x4f\xcc\xe7\xbb\xc1" + "\xd4\x09\x73\xcf\xea\xd0\x15\x07\x3d\xa5\x8a\x8a\x95\x43\xe4\x68" + "\xea\xc6\x25\xc1\xc1\x01\x25\x4c\x7e\xc3\x3c\xa6\x04\x0a\xe7\x08" + "\x98", + .key_len = 49, + .params = + "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48" + "\xce\x3d\x03\x01\x01", + .param_len = 21, + .m = + "\xcd\xb9\xd2\x1c\xb7\x6f\xcd\x44\xb3\xfd\x63\xea\xa3\x66\x7f\xae" + "\x63\x85\xe7\x82", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x35\x02\x19\x00\xba\xe5\x93\x83\x6e\xb6\x3b\x63\xa0\x27\x91" + "\xc6\xf6\x7f\xc3\x09\xad\x59\xad\x88\x27\xd6\x92\x6b\x02\x18\x10" + "\x68\x01\x9d\xba\xce\x83\x08\xef\x95\x52\x7b\xa0\x0f\xe4\x18\x86" + "\x80\x6f\xa5\x79\x77\xda\xd0", + .c_size = 55, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { + .key = "\x04\xb6\x4b\xb1\xd1\xac\xba\x24\x8f\x65\xb2\x60\x00\x90\xbf\xbd" "\x78\x05\x73\xe9\x79\x1d\x6f\x7c\x0b\xd2\xc3\x93\xa7\x28\xe1\x75" "\xf7\xd5\x95\x1d\x28\x10\xc0\x75\x50\x5c\x1a\x4f\x3f\x8f\xa5\xee" @@ -756,6 +780,32 @@ static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = { { .key = + "\x04\xb9\x7b\xbb\xd7\x17\x64\xd2\x7e\xfc\x81\x5d\x87\x06\x83\x41" + "\x22\xd6\x9a\xaa\x87\x17\xec\x4f\x63\x55\x2f\x94\xba\xdd\x83\xe9" + "\x34\x4b\xf3\xe9\x91\x13\x50\xb6\xcb\xca\x62\x08\xe7\x3b\x09\xdc" + "\xc3\x63\x4b\x2d\xb9\x73\x53\xe4\x45\xe6\x7c\xad\xe7\x6b\xb0\xe8" + "\xaf", + .key_len = 65, + .params = + "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48" + "\xce\x3d\x03\x01\x07", + .param_len = 21, + .m = + "\xc2\x2b\x5f\x91\x78\x34\x26\x09\x42\x8d\x6f\x51\xb2\xc5\xaf\x4c" + "\x0b\xde\x6a\x42", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x46\x02\x21\x00\xf9\x25\xce\x9f\x3a\xa6\x35\x81\xcf\xd4\xe7" + "\xb7\xf0\x82\x56\x41\xf7\xd4\xad\x8d\x94\x5a\x69\x89\xee\xca\x6a" + "\x52\x0e\x48\x4d\xcc\x02\x21\x00\xd7\xe4\xef\x52\x66\xd3\x5b\x9d" + "\x8a\xfa\x54\x93\x29\xa7\x70\x86\xf1\x03\x03\xf3\x3b\xe2\x73\xf7" + "\xfb\x9d\x8b\xde\xd4\x8d\x6f\xad", + .c_size = 72, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { + .key = "\x04\x8b\x6d\xc0\x33\x8e\x2d\x8b\x67\xf5\xeb\xc4\x7f\xa0\xf5\xd9" "\x7b\x03\xa5\x78\x9a\xb5\xea\x14\xe4\x23\xd0\xaf\xd7\x0e\x2e\xa0" "\xc9\x8b\xdb\x95\xf8\xb3\xaf\xac\x00\x2c\x2c\x1f\x7a\xfd\x95\x88" @@ -866,6 +916,36 @@ static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p384_tv_template[] = { { + .key = /* secp384r1(sha1) */ + "\x04\x89\x25\xf3\x97\x88\xcb\xb0\x78\xc5\x72\x9a\x14\x6e\x7a\xb1" + "\x5a\xa5\x24\xf1\x95\x06\x9e\x28\xfb\xc4\xb9\xbe\x5a\x0d\xd9\x9f" + "\xf3\xd1\x4d\x2d\x07\x99\xbd\xda\xa7\x66\xec\xbb\xea\xba\x79\x42" + "\xc9\x34\x89\x6a\xe7\x0b\xc3\xf2\xfe\x32\x30\xbe\xba\xf9\xdf\x7e" + "\x4b\x6a\x07\x8e\x26\x66\x3f\x1d\xec\xa2\x57\x91\x51\xdd\x17\x0e" + "\x0b\x25\xd6\x80\x5c\x3b\xe6\x1a\x98\x48\x91\x45\x7a\x73\xb0\xc3" + "\xf1", + .key_len = 97, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x22", + .param_len = 18, + .m = + "\x12\x55\x28\xf0\x77\xd5\xb6\x21\x71\x32\x48\xcd\x28\xa8\x25\x22" + "\x3a\x69\xc1\x93", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x66\x02\x31\x00\xf5\x0f\x24\x4c\x07\x93\x6f\x21\x57\x55\x07" + "\x20\x43\x30\xde\xa0\x8d\x26\x8e\xae\x63\x3f\xbc\x20\x3a\xc6\xf1" + "\x32\x3c\xce\x70\x2b\x78\xf1\x4c\x26\xe6\x5b\x86\xcf\xec\x7c\x7e" + "\xd0\x87\xd7\xd7\x6e\x02\x31\x00\xcd\xbb\x7e\x81\x5d\x8f\x63\xc0" + "\x5f\x63\xb1\xbe\x5e\x4c\x0e\xa1\xdf\x28\x8c\x1b\xfa\xf9\x95\x88" + "\x74\xa0\x0f\xbf\xaf\xc3\x36\x76\x4a\xa1\x59\xf1\x1c\xa4\x58\x26" + "\x79\x12\x2a\xb7\xc5\x15\x92\xc5", + .c_size = 104, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { .key = /* secp384r1(sha224) */ "\x04\x69\x6c\xcf\x62\xee\xd0\x0d\xe5\xb5\x2f\x70\x54\xcf\x26\xa0" "\xd9\x98\x8d\x92\x2a\xab\x9b\x11\xcb\x48\x18\xa1\xa9\x0d\xd5\x18" diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c index b91155ea9c34..c9131259f717 100644 --- a/drivers/acpi/acpica/dbnames.c +++ b/drivers/acpi/acpica/dbnames.c @@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle, ACPI_FREE(buffer.pointer); buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); - + status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) { + acpi_os_printf("Could Not evaluate object %p\n", + obj_handle); + return (AE_OK); + } /* * Since this is a field unit, surround the output in braces */ diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index 66e7f529e92f..01faca3a238a 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -851,7 +851,7 @@ err_put_table: return rc; } -static void __exit einj_remove(struct platform_device *pdev) +static void einj_remove(struct platform_device *pdev) { struct apei_exec_context ctx; diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 302dce0b2b50..d67881b50bca 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -662,14 +662,15 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz, { int result; - tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz", - trip_table, - trip_count, - tz, - &acpi_thermal_zone_ops, - NULL, - passive_delay, - tz->polling_frequency * 100); + if (trip_count) + tz->thermal_zone = thermal_zone_device_register_with_trips( + "acpitz", trip_table, trip_count, tz, + &acpi_thermal_zone_ops, NULL, passive_delay, + tz->polling_frequency * 100); + else + tz->thermal_zone = thermal_tripless_zone_device_register( + "acpitz", tz, &acpi_thermal_zone_ops, NULL); + if (IS_ERR(tz->thermal_zone)) return PTR_ERR(tz->thermal_zone); @@ -901,11 +902,8 @@ static int acpi_thermal_add(struct acpi_device *device) trip++; } - if (trip == trip_table) { + if (trip == trip_table) pr_warn(FW_BUG "No valid trip points!\n"); - result = -ENODEV; - goto free_memory; - } result = acpi_thermal_register_thermal_zone(tz, trip_table, trip - trip_table, diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index d4a626f87963..79a8b0aa37bf 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -30,7 +30,6 @@ #define ST_AHCI_OOBR_CIMAX_SHIFT 0 struct st_ahci_drv_data { - struct platform_device *ahci; struct reset_control *pwr; struct reset_control *sw_rst; struct reset_control *pwr_rst; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b0d6e69c4a5b..214b935c2ced 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ - if (ap->pflags & ATA_PFLAG_RESUMING) + if (ap->pflags & ATA_PFLAG_RESUMING) { + dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; + } } } @@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return 0; err: + dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 0a0f483124c3..2f4c58837641 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool do_resume; int ret = 0; mutex_lock(&ap->scsi_scan_mutex); @@ -4751,7 +4752,15 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + do_resume = dev->flags & ATA_DFLAG_RESUMING; + spin_unlock_irqrestore(ap->lock, flags); + if (do_resume) { + ret = scsi_resume_device(sdev); + if (ret == -EWOULDBLOCK) + goto unlock; + dev->flags &= ~ATA_DFLAG_RESUMING; + } ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 4ac854f6b057..88b2e9817f49 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1371,9 +1371,6 @@ static struct pci_driver pata_macio_pci_driver = { .suspend = pata_macio_pci_suspend, .resume = pata_macio_pci_resume, #endif - .driver = { - .owner = THIS_MODULE, - }, }; MODULE_DEVICE_TABLE(pci, pata_macio_pci_match); diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index 400b22ee99c3..4c270999ba3c 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -200,7 +200,10 @@ int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge) pclk = sg->sata0_pclk; else pclk = sg->sata1_pclk; - clk_enable(pclk); + ret = clk_enable(pclk); + if (ret) + return ret; + msleep(10); /* Do not keep clocking a bridge that is not online */ diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index e82786c63fbd..9bec0aee92e0 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = { }, }; -static const struct pci_device_id mv_pci_tbl[] = { - { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, - { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, - /* RocketRAID 1720/174x have different identifiers */ - { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, - - { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, - { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, - { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, - - { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, - - /* Adaptec 1430SA */ - { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, - - /* Marvell 7042 support */ - { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, - - /* Highpoint RocketRAID PCIe series */ - { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, - { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, - - { } /* terminate list */ -}; - static const struct mv_hw_ops mv5xxx_ops = { .phy_errata = mv5_phy_errata, .enable_leds = mv5_enable_leds, @@ -4303,6 +4272,36 @@ static int mv_pci_init_one(struct pci_dev *pdev, static int mv_pci_device_resume(struct pci_dev *pdev); #endif +static const struct pci_device_id mv_pci_tbl[] = { + { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, + { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, + /* RocketRAID 1720/174x have different identifiers */ + { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, + + { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, + { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, + { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, + + { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, + + /* Adaptec 1430SA */ + { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, + + /* Marvell 7042 support */ + { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, + + /* Highpoint RocketRAID PCIe series */ + { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, + { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, + + { } /* terminate list */ +}; static struct pci_driver mv_pci_driver = { .name = DRV_NAME, @@ -4315,6 +4314,7 @@ static struct pci_driver mv_pci_driver = { #endif }; +MODULE_DEVICE_TABLE(pci, mv_pci_tbl); /** * mv_print_info - Dump key info to kernel log for perusal. @@ -4487,7 +4487,6 @@ static void __exit mv_exit(void) MODULE_AUTHOR("Brett Russ"); MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers"); MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, mv_pci_tbl); MODULE_VERSION(DRV_VERSION); MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index b51d7a9d0d90..a482741eb181 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_fromio(psource, dimm_mmio + offset / 4, dist); psource += dist; @@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, readl(mmio + PDC_DIMM_WINDOW_CTLR); offset -= (idx * window_size); idx++; - dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_toio(dimm_mmio + offset / 4, psource, dist); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); diff --git a/drivers/base/core.c b/drivers/base/core.c index b93f3c5716ae..5f4e03336e68 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; +static struct workqueue_struct *device_link_wq; /** * __fwnode_link_add - Create a link between two fwnode_handles. @@ -533,12 +534,26 @@ static void devlink_dev_release(struct device *dev) /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or - * supplier devices get deleted when it runs, so put it into the "long" - * workqueue. + * supplier devices get deleted when it runs, so put it into the + * dedicated workqueue. */ - queue_work(system_long_wq, &link->rm_work); + queue_work(device_link_wq, &link->rm_work); } +/** + * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate + */ +void device_link_wait_removal(void) +{ + /* + * devlink removal jobs are queued in the dedicated work queue. + * To be sure that all removal jobs are terminated, ensure that any + * scheduled work has run to completion. + */ + flush_workqueue(device_link_wq); +} +EXPORT_SYMBOL_GPL(device_link_wait_removal); + static struct class devlink_class = { .name = "devlink", .dev_groups = devlink_groups, @@ -4164,9 +4179,14 @@ int __init devices_init(void) sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; + device_link_wq = alloc_workqueue("device_link_wq", 0, 0); + if (!device_link_wq) + goto wq_err; return 0; + wq_err: + kobject_put(sysfs_dev_char_kobj); char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c index 41edd6a430eb..55999a50ccc0 100644 --- a/drivers/base/regmap/regcache-maple.c +++ b/drivers/base/regmap/regcache-maple.c @@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, unsigned long *entry, *lower, *upper; unsigned long lower_index, lower_last; unsigned long upper_index, upper_last; - int ret; + int ret = 0; lower = NULL; upper = NULL; @@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, upper_index = max + 1; upper_last = mas.last; - upper = kmemdup(&entry[max + 1], + upper = kmemdup(&entry[max - mas.index + 1], ((mas.last - max) * sizeof(unsigned long)), map->alloc_flags); @@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min, unsigned long lmin = min; unsigned long lmax = max; unsigned int r, v, sync_start; - int ret; + int ret = 0; bool sync_needed = false; map->cache_bypass = true; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 5cb425f6f02d..0a34dd3c4f38 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2839,6 +2839,43 @@ int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val) EXPORT_SYMBOL_GPL(regmap_read); /** + * regmap_read_bypassed() - Read a value from a single register direct + * from the device, bypassing the cache + * + * @map: Register map to read from + * @reg: Register to be read from + * @val: Pointer to store read value + * + * A value of zero will be returned on success, a negative errno will + * be returned in error cases. + */ +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val) +{ + int ret; + bool bypass, cache_only; + + if (!IS_ALIGNED(reg, map->reg_stride)) + return -EINVAL; + + map->lock(map->lock_arg); + + bypass = map->cache_bypass; + cache_only = map->cache_only; + map->cache_bypass = true; + map->cache_only = false; + + ret = _regmap_read(map, reg, val); + + map->cache_bypass = bypass; + map->cache_only = cache_only; + + map->unlock(map->lock_arg); + + return ret; +} +EXPORT_SYMBOL_GPL(regmap_read_bypassed); + +/** * regmap_raw_read() - Read raw data from the device * * @map: Register map to read from diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 71c39bcd872c..ed33cf7192d2 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1965,10 +1965,10 @@ static int null_add_dev(struct nullb_device *dev) out_ida_free: ida_free(&nullb_indexes, nullb->index); -out_cleanup_zone: - null_free_zoned_dev(dev); out_cleanup_disk: put_disk(nullb->disk); +out_cleanup_zone: + null_free_zoned_dev(dev); out_cleanup_tags: if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index b40b32fa7f1c..19cfc342fc7b 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -826,11 +826,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) { + bdaddr_t bdaddr_swapped; struct sk_buff *skb; int err; - skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr, - HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + baswap(&bdaddr_swapped, bdaddr); + + skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, + &bdaddr_swapped, HCI_EV_VENDOR, + HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 8a60ad7acd70..ecbc52eaf101 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,7 +7,6 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -226,6 +225,7 @@ struct qca_serdev { struct qca_power *bt_power; u32 init_speed; u32 oper_speed; + bool bdaddr_property_broken; const char *firmware_name; }; @@ -1843,6 +1843,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + struct qca_serdev *qcadev; const char *soc_name; ret = qca_check_speeds(hu); @@ -1904,16 +1905,11 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - /* Set BDA quirk bit for reading BDA value from fwnode property - * only if that property exist in DT. - */ - if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); - } else { - bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); - } + qcadev = serdev_device_get_drvdata(hu->serdev); + if (qcadev->bdaddr_property_broken) + set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); hci_set_aosp_capable(hdev); @@ -2295,6 +2291,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); + qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev, + "qcom,local-bd-address-broken"); + if (data) qcadev->btsoc_type = data->soc_type; else diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index f44efbb89c34..2102377f727b 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1090,7 +1090,7 @@ static int __sev_snp_init_locked(int *error) void *arg = &data; int cmd, rc = 0; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; sev = psp->sev_data; diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 1cd304de5388..b2191ade9011 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -806,6 +806,8 @@ static int save_iaa_wq(struct idxd_wq *wq) return -EINVAL; cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; + if (!cpus_per_iaa) + cpus_per_iaa = 1; out: return 0; } @@ -821,10 +823,12 @@ static void remove_iaa_wq(struct idxd_wq *wq) } } - if (nr_iaa) + if (nr_iaa) { cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; - else - cpus_per_iaa = 0; + if (!cpus_per_iaa) + cpus_per_iaa = 1; + } else + cpus_per_iaa = 1; } static int wq_table_add_wqs(int iaa, int cpu) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 67998dbd1d46..5f3c9c5529b9 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -144,17 +144,4 @@ config CXL_REGION_INVALIDATION_TEST If unsure, or if this kernel is meant for production environments, say N. -config CXL_PMU - tristate "CXL Performance Monitoring Unit" - default CXL_BUS - depends on PERF_EVENTS - help - Support performance monitoring as defined in CXL rev 3.0 - section 13.2: Performance Monitoring. CXL components may have - one or more CXL Performance Monitoring Units (CPMUs). - - Say 'y/m' to enable a driver that will attach to performance - monitoring units and provide standard perf based interfaces. - - If unsure say 'm'. endif diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index 9c2a0c082a76..ed4b323886e4 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -84,11 +84,11 @@ static int sanitycheck(void *arg) return -ENOMEM; chain = mock_chain(NULL, f, 1); - if (!chain) + if (chain) + dma_fence_enable_sw_signaling(chain); + else err = -ENOMEM; - dma_fence_enable_sw_signaling(chain); - dma_fence_signal(f); dma_fence_put(f); diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig index a4cae73f20d3..20607ed54243 100644 --- a/drivers/dpll/Kconfig +++ b/drivers/dpll/Kconfig @@ -4,4 +4,4 @@ # config DPLL - bool + bool diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 7bc71f4be64a..38d19410a2be 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2060,6 +2060,8 @@ static void bus_reset_work(struct work_struct *work) ohci->generation = generation; reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset); + if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS) + reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset); if (ohci->quirks & QUIRK_RESET_PACKET) ohci->request_generation = generation; @@ -2125,12 +2127,14 @@ static irqreturn_t irq_handler(int irq, void *data) return IRQ_NONE; /* - * busReset and postedWriteErr must not be cleared yet + * busReset and postedWriteErr events must not be cleared yet * (OHCI 1.1 clauses 7.2.3.2 and 13.2.8.1) */ reg_write(ohci, OHCI1394_IntEventClear, event & ~(OHCI1394_busReset | OHCI1394_postedWriteErr)); log_irqs(ohci, event); + if (event & OHCI1394_busReset) + reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset); if (event & OHCI1394_selfIDComplete) queue_work(selfid_workqueue, &ohci->bus_reset_work); diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 7e1852859550..c41e7b2091cd 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -120,7 +120,7 @@ efi_status_t efi_random_alloc(unsigned long size, continue; } - target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align; + target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align; pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 6a6ffc6707bd..d5a8182cf2e1 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -496,6 +496,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, hdr->vid_mode = 0xffff; hdr->type_of_loader = 0x21; + hdr->initrd_addr_max = INT_MAX; /* Convert unicode cmdline to ascii */ cmdline_ptr = efi_convert_cmdline(image, &options_size); diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index f384fa278764..d09c7d728365 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -728,6 +728,25 @@ static u32 line_event_id(int level) GPIO_V2_LINE_EVENT_FALLING_EDGE; } +static inline char *make_irq_label(const char *orig) +{ + char *new; + + if (!orig) + return NULL; + + new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL); + if (!new) + return ERR_PTR(-ENOMEM); + + return new; +} + +static inline void free_irq_label(const char *label) +{ + kfree(label); +} + #ifdef CONFIG_HTE static enum hte_return process_hw_ts_thread(void *p) @@ -1015,6 +1034,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us) { unsigned long irqflags; int ret, level, irq; + char *label; /* try hardware */ ret = gpiod_set_debounce(line->desc, debounce_period_us); @@ -1037,11 +1057,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us) if (irq < 0) return -ENXIO; + label = make_irq_label(line->req->label); + if (IS_ERR(label)) + return -ENOMEM; + irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING; ret = request_irq(irq, debounce_irq_handler, irqflags, - line->req->label, line); - if (ret) + label, line); + if (ret) { + free_irq_label(label); return ret; + } line->irq = irq; } else { ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH); @@ -1086,7 +1112,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc, static void edge_detector_stop(struct line *line) { if (line->irq) { - free_irq(line->irq, line); + free_irq_label(free_irq(line->irq, line)); line->irq = 0; } @@ -1110,6 +1136,7 @@ static int edge_detector_setup(struct line *line, unsigned long irqflags = 0; u64 eflags; int irq, ret; + char *label; eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS; if (eflags && !kfifo_initialized(&line->req->events)) { @@ -1146,11 +1173,17 @@ static int edge_detector_setup(struct line *line, IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING; irqflags |= IRQF_ONESHOT; + label = make_irq_label(line->req->label); + if (IS_ERR(label)) + return PTR_ERR(label); + /* Request a thread to read the events */ ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread, - irqflags, line->req->label, line); - if (ret) + irqflags, label, line); + if (ret) { + free_irq_label(label); return ret; + } line->irq = irq; return 0; @@ -1973,7 +2006,7 @@ static void lineevent_free(struct lineevent_state *le) blocking_notifier_chain_unregister(&le->gdev->device_notifier, &le->device_unregistered_nb); if (le->irq) - free_irq(le->irq, le); + free_irq_label(free_irq(le->irq, le)); if (le->desc) gpiod_free(le->desc); kfree(le->label); @@ -2114,6 +2147,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) int fd; int ret; int irq, irqflags = 0; + char *label; if (copy_from_user(&eventreq, ip, sizeof(eventreq))) return -EFAULT; @@ -2198,15 +2232,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_le; + label = make_irq_label(le->label); + if (IS_ERR(label)) { + ret = PTR_ERR(label); + goto out_free_le; + } + /* Request a thread to read the events */ ret = request_threaded_irq(irq, lineevent_irq_handler, lineevent_irq_thread, irqflags, - le->label, + label, le); - if (ret) + if (ret) { + free_irq_label(label); goto out_free_le; + } le->irq = irq; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index ce94e37bcbee..94903fc1c145 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1175,6 +1175,9 @@ struct gpio_device *gpio_device_find(const void *data, list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { + if (!device_is_registered(&gdev->dev)) + continue; + guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); @@ -2397,6 +2400,11 @@ char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset) } EXPORT_SYMBOL_GPL(gpiochip_dup_line_label); +static inline const char *function_name_or_default(const char *con_id) +{ + return con_id ?: "(default)"; +} + /** * gpiochip_request_own_desc - Allow GPIO chip to request its own descriptor * @gc: GPIO chip @@ -2425,10 +2433,11 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags) { struct gpio_desc *desc = gpiochip_get_desc(gc, hwnum); + const char *name = function_name_or_default(label); int ret; if (IS_ERR(desc)) { - chip_err(gc, "failed to get GPIO descriptor\n"); + chip_err(gc, "failed to get GPIO %s descriptor\n", name); return desc; } @@ -2438,8 +2447,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, ret = gpiod_configure_flags(desc, label, lflags, dflags); if (ret) { - chip_err(gc, "setup of own GPIO %s failed\n", label); gpiod_free_commit(desc); + chip_err(gc, "setup of own GPIO %s failed\n", name); return ERR_PTR(ret); } @@ -4153,19 +4162,17 @@ static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode, enum gpiod_flags *flags, unsigned long *lookupflags) { + const char *name = function_name_or_default(con_id); struct gpio_desc *desc = ERR_PTR(-ENOENT); if (is_of_node(fwnode)) { - dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", - fwnode, con_id); + dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags); } else if (is_acpi_node(fwnode)) { - dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", - fwnode, con_id); + dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags); } else if (is_software_node(fwnode)) { - dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", - fwnode, con_id); + dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags); } @@ -4181,6 +4188,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer, bool platform_lookup_allowed) { unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT; + const char *name = function_name_or_default(con_id); /* * scoped_guard() is implemented as a for loop, meaning static * analyzers will complain about these two not being initialized. @@ -4203,8 +4211,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer, } if (IS_ERR(desc)) { - dev_dbg(consumer, "No GPIO consumer %s found\n", - con_id); + dev_dbg(consumer, "No GPIO consumer %s found\n", name); return desc; } @@ -4226,15 +4233,14 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer, * * FIXME: Make this more sane and safe. */ - dev_info(consumer, - "nonexclusive access to GPIO for %s\n", con_id); + dev_info(consumer, "nonexclusive access to GPIO for %s\n", name); return desc; } ret = gpiod_configure_flags(desc, con_id, lookupflags, flags); if (ret < 0) { - dev_dbg(consumer, "setup of GPIO %s failed\n", con_id); gpiod_put(desc); + dev_dbg(consumer, "setup of GPIO %s failed\n", name); return ERR_PTR(ret); } @@ -4350,6 +4356,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_optional); int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, unsigned long lflags, enum gpiod_flags dflags) { + const char *name = function_name_or_default(con_id); int ret; if (lflags & GPIO_ACTIVE_LOW) @@ -4393,7 +4400,7 @@ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, /* No particular flag request, return here... */ if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) { - gpiod_dbg(desc, "no flags found for %s\n", con_id); + gpiod_dbg(desc, "no flags found for GPIO %s\n", name); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5dc24c971b41..aa16d51dd842 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4539,6 +4539,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) goto unprepare; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a07e4b87d4ca..fdd36fb027ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2237,6 +2237,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5505d646f43a..06f0a6534a94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; volatile u32 *mqd; - int r; + u32 *kbuf; + int r, i; uint32_t value, result; if (*pos & 3 || size & 3) return -EINVAL; - result = 0; + kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - return r; + goto err_free; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } + if (r) + goto err_unreserve; + /* + * Copy to local buffer to avoid put_user(), which might fault + * and acquire mmap_sem, under reservation_ww_class_mutex. + */ + for (i = 0; i < ring->mqd_size/sizeof(u32); i++) + kbuf[i] = mqd[i]; + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + result = 0; while (size) { if (*pos >= ring->mqd_size) - goto done; + break; - value = mqd[*pos/4]; + value = kbuf[*pos/4]; r = put_user(value, (uint32_t *)buf); if (r) - goto done; + goto err_free; buf += 4; result += 4; size -= 4; *pos += 4; } -done: - amdgpu_bo_kunmap(ring->mqd_obj); - mqd = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - return r; - + kfree(kbuf); return result; + +err_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +err_free: + kfree(kbuf); + return r; } static const struct file_operations amdgpu_debugfs_mqd_fops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ab820cf52668..0df97c3e3a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a6236..5014b5af95fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; @@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +102,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 7a65a2b128ec..6695481f870f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 8e7b763cfdb7..84368cf1e175 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -248,7 +248,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -271,6 +271,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -346,6 +348,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dfa8c69532d4..f9631f4b1a02 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1523,7 +1523,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, /* Find a KFD GPU device that supports the get_dmabuf_info query */ for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) - if (dev) + if (dev && !kfd_devcgroup_check_permission(dev)) break; if (!dev) return -EINVAL; @@ -1545,7 +1545,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, if (xcp_id >= 0) args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; else - args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id; + args->gpu_id = dev->id; args->flags = flags; /* Copy metadata buffer to user mode */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 9a06c6fb6605..40a21be6c07c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 7e2859736a55..fe2ad0c0de95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_CTXID0_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 91dd5e045b51..c4c6a29052ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, sq_int_data, 24); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 42d40560cd30..a81ef232fdef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1473,7 +1473,7 @@ static inline void kfd_flush_tlb(struct kfd_process_device *pdd, static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2851719d7121..71d2d44681b2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6305,9 +6305,8 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket); - else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || - stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - stream->signal == SIGNAL_TYPE_EDP) { + + if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) { // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet @@ -6323,9 +6322,8 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); + aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; - if (stream->link->psr_settings.psr_feature_enabled) - aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } finish: dc_sink_release(sink); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 1f08c6564c3b..286ecd28cc6e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -141,9 +141,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) * amdgpu_dm_psr_enable() - enable psr f/w * @stream: stream state * - * Return: true if success */ -bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) +void amdgpu_dm_psr_enable(struct dc_stream_state *stream) { struct dc_link *link = stream->link; unsigned int vsync_rate_hz = 0; @@ -190,7 +189,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) power_opt |= psr_power_opt_z10_static_screen; - return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt); + dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt); + + if (link->ctx->dc->caps.ips_support) + dc_allow_idle_optimizations(link->ctx->dc, true); } /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index 6806b3c9c84b..1fdfd183c0d9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -32,7 +32,7 @@ #define AMDGPU_DM_PSR_ENTRY_DELAY 5 void amdgpu_dm_set_psr_caps(struct dc_link *link); -bool amdgpu_dm_psr_enable(struct dc_stream_state *stream); +void amdgpu_dm_psr_enable(struct dc_stream_state *stream); bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index c378b879c76d..101fe96287cb 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -73,6 +73,8 @@ #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 + #define REG(reg_name) \ (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -411,9 +413,12 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs static void init_clk_states(struct clk_mgr *clk_mgr) { + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); + if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD) + clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; @@ -709,7 +714,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk clock_table->NumFclkLevelsEnabled; max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk); - num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : + num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : clock_table->NumDcfClkLevelsEnabled; for (i = 0; i < num_dcfclk; i++) { int j; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e7dc128f6284..03b554e912a2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3024,7 +3024,8 @@ static void backup_planes_and_stream_state( scratch->blend_tf[i] = *status->plane_states[i]->blend_tf; } scratch->stream_state = *stream; - scratch->out_transfer_func = *stream->out_transfer_func; + if (stream->out_transfer_func) + scratch->out_transfer_func = *stream->out_transfer_func; } static void restore_planes_and_stream_state( @@ -3046,7 +3047,8 @@ static void restore_planes_and_stream_state( *status->plane_states[i]->blend_tf = scratch->blend_tf[i]; } *stream = scratch->stream_state; - *stream->out_transfer_func = scratch->out_transfer_func; + if (stream->out_transfer_func) + *stream->out_transfer_func = scratch->out_transfer_func; } static bool update_planes_and_stream_state(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile index f0777d61c2cb..c307f040e48f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = -Wno-override-init DCE110 = dce110_timing_generator.o \ dce110_compressor.o dce110_opp_regamma_v.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile index 7e92effec894..683866797709 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = -Wno-override-init DCE112 = dce112_compressor.o diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile index 1e3ef68a452a..8f508e662748 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile @@ -24,7 +24,7 @@ # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = -Wno-override-init DCE120 = dce120_timing_generator.o diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile index fee331accc0e..eede83ad91fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \ dce60_resource.o diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile index 7eefffbdc925..fba189d26652 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. -CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init) +CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = -Wno-override-init DCE80 = dce80_timing_generator.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index bf3386cd444d..5ebb57303130 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -44,6 +44,36 @@ #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) +void mpc3_mpc_init(struct mpc *mpc) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + int opp_id; + + mpc1_mpc_init(mpc); + + for (opp_id = 0; opp_id < MAX_OPP; opp_id++) { + if (REG(MUX[opp_id])) + /* disable mpc out rate and flow control */ + REG_UPDATE_2(MUX[opp_id], MPC_OUT_RATE_CONTROL_DISABLE, + 1, MPC_OUT_FLOW_CONTROL_COUNT, 0); + } +} + +void mpc3_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + + mpc1_mpc_init_single_inst(mpc, mpcc_id); + + /* assuming mpc out mux is connected to opp with the same index at this + * point in time (e.g. transitioning from vbios to driver) + */ + if (mpcc_id < MAX_OPP && REG(MUX[mpcc_id])) + /* disable mpc out rate and flow control */ + REG_UPDATE_2(MUX[mpcc_id], MPC_OUT_RATE_CONTROL_DISABLE, + 1, MPC_OUT_FLOW_CONTROL_COUNT, 0); +} + bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id) @@ -80,25 +110,6 @@ void mpc3_disable_dwb_mux( MPC_DWB0_MUX, 0xf); } -void mpc3_set_out_rate_control( - struct mpc *mpc, - int opp_id, - bool enable, - bool rate_2x_mode, - struct mpc_dwb_flow_control *flow_control) -{ - struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); - - REG_UPDATE_2(MUX[opp_id], - MPC_OUT_RATE_CONTROL_DISABLE, !enable, - MPC_OUT_RATE_CONTROL, rate_2x_mode); - - if (flow_control) - REG_UPDATE_2(MUX[opp_id], - MPC_OUT_FLOW_CONTROL_MODE, flow_control->flow_ctrl_mode, - MPC_OUT_FLOW_CONTROL_COUNT, flow_control->flow_ctrl_cnt1); -} - enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id) { /*Contrary to DCN2 and DCN1 wherein a single status register field holds this info; @@ -1490,8 +1501,8 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .read_mpcc_state = mpc3_read_mpcc_state, .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, - .mpc_init = mpc1_mpc_init, - .mpc_init_single_inst = mpc1_mpc_init_single_inst, + .mpc_init = mpc3_mpc_init, + .mpc_init_single_inst = mpc3_mpc_init_single_inst, .update_blending = mpc2_update_blending, .cursor_lock = mpc1_cursor_lock, .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, @@ -1508,7 +1519,6 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, - .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc3_program_shaper, .acquire_rmu = mpcc3_acquire_rmu, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h index 9cb96ae95a2f..ce93003dae01 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h @@ -1007,6 +1007,13 @@ void dcn30_mpc_construct(struct dcn30_mpc *mpc30, int num_mpcc, int num_rmu); +void mpc3_mpc_init( + struct mpc *mpc); + +void mpc3_mpc_init_single_inst( + struct mpc *mpc, + unsigned int mpcc_id); + bool mpc3_program_shaper( struct mpc *mpc, const struct pwl_params *params, @@ -1078,13 +1085,6 @@ bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id); -void mpc3_set_out_rate_control( - struct mpc *mpc, - int opp_id, - bool enable, - bool rate_2x_mode, - struct mpc_dwb_flow_control *flow_control); - void mpc3_power_on_ogam_lut( struct mpc *mpc, int mpcc_id, bool power_on); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index e789e654c387..e408e859b355 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -47,7 +47,7 @@ void mpc32_mpc_init(struct mpc *mpc) struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); int mpcc_id; - mpc1_mpc_init(mpc); + mpc3_mpc_init(mpc); if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) { if (mpc30->mpc_mask->MPCC_MCM_SHAPER_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPCC_MCM_3DLUT_MEM_LOW_PWR_MODE) { @@ -991,7 +991,7 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, .mpc_init = mpc32_mpc_init, - .mpc_init_single_inst = mpc1_mpc_init_single_inst, + .mpc_init_single_inst = mpc3_mpc_init_single_inst, .update_blending = mpc2_update_blending, .cursor_lock = mpc1_cursor_lock, .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, @@ -1008,7 +1008,6 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, - .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc32_program_shaper, .program_3dlut = mpc32_program_3dlut, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 80bebfc268db..21e0eef3269b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -166,8 +166,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .num_states = 5, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 320.0, + .sr_exit_z8_time_us = 250.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index dc9e1b758ed6..b3ffab77cf88 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -98,55 +98,114 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .clock_limits = { { .state = 0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 400.0, + .fabricclk_mhz = 400.0, + .socclk_mhz = 600.0, + .dram_speed_mts = 3200.0, + .dispclk_mhz = 600.0, + .dppclk_mhz = 600.0, .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 186.0, + .dscclk_mhz = 200.0, .dtbclk_mhz = 600.0, }, { .state = 1, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 600.0, + .fabricclk_mhz = 1000.0, + .socclk_mhz = 733.0, + .dram_speed_mts = 6400.0, + .dispclk_mhz = 800.0, + .dppclk_mhz = 800.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, + .dscclk_mhz = 266.7, .dtbclk_mhz = 600.0, }, { .state = 2, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 738.0, + .fabricclk_mhz = 1200.0, + .socclk_mhz = 880.0, + .dram_speed_mts = 7500.0, + .dispclk_mhz = 800.0, + .dppclk_mhz = 800.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, + .dscclk_mhz = 266.7, .dtbclk_mhz = 600.0, }, { .state = 3, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 800.0, + .fabricclk_mhz = 1400.0, + .socclk_mhz = 978.0, + .dram_speed_mts = 7500.0, + .dispclk_mhz = 960.0, + .dppclk_mhz = 960.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, + .dscclk_mhz = 320.0, .dtbclk_mhz = 600.0, }, { .state = 4, + .dcfclk_mhz = 873.0, + .fabricclk_mhz = 1600.0, + .socclk_mhz = 1100.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1066.7, + .dppclk_mhz = 1066.7, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 355.6, + .dtbclk_mhz = 600.0, + }, + { + .state = 5, + .dcfclk_mhz = 960.0, + .fabricclk_mhz = 1700.0, + .socclk_mhz = 1257.0, + .dram_speed_mts = 8533.0, .dispclk_mhz = 1200.0, .dppclk_mhz = 1200.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, + .dscclk_mhz = 400.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 6, + .dcfclk_mhz = 1067.0, + .fabricclk_mhz = 1850.0, + .socclk_mhz = 1257.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1371.4, + .dppclk_mhz = 1371.4, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 457.1, + .dtbclk_mhz = 600.0, + }, + { + .state = 7, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 2000.0, + .socclk_mhz = 1467.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1600.0, + .dppclk_mhz = 1600.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 533.3, .dtbclk_mhz = 600.0, }, }, - .num_states = 5, + .num_states = 8, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 320.0, + .sr_exit_z8_time_us = 250.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, @@ -177,6 +236,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .num_chans = 4, + .dram_clock_change_latency_us = 11.72, + .dispclk_dppclk_vco_speed_mhz = 2400.0, }; /* @@ -340,6 +402,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = @@ -352,6 +416,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; } } @@ -551,6 +617,7 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context) if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; } + /*dcn351 does not support z9/z10*/ if (context->stream_count == 0 || plane_count == 0) { support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; @@ -564,11 +631,9 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context) dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; - /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/ - if (is_pwrseq0 && (is_psr || is_replay)) + if (is_pwrseq0 && (is_psr || is_replay)) support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW; - } context->bw_ctx.bw.dcn.clk.zstate_support = support; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 17a58f41fc6a..a20f28a5d2e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -228,17 +228,13 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s break; case dml_project_dcn35: + case dml_project_dcn351: out->num_chans = 4; out->round_trip_ping_latency_dcfclk_cycles = 106; out->smn_latency_us = 2; out->dispclk_dppclk_vco_speed_mhz = 3600; break; - case dml_project_dcn351: - out->num_chans = 16; - out->round_trip_ping_latency_dcfclk_cycles = 1100; - out->smn_latency_us = 2; - break; } /* ---Overrides if available--- */ if (dml2->config.bbox_overrides.dram_num_chan) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 9d5df4c0da59..0ba1feaf96c0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1185,7 +1185,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) if (dccg) { dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + if (dccg && dccg->funcs->set_dtbclk_dto) + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 3a9cc8ac0c07..093f4387553c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -69,29 +69,6 @@ #define FN(reg_name, field_name) \ hws->shifts->field_name, hws->masks->field_name -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -183,10 +160,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -199,20 +172,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index c0b526cf1786..7668229438da 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -966,29 +966,6 @@ void dcn32_init_hw(struct dc *dc) } } -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -1103,10 +1080,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -1119,20 +1092,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 4b92df23ff0d..a5560b3fc39b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -358,29 +358,6 @@ void dcn35_init_hw(struct dc *dc) } } -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -474,10 +451,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -490,20 +463,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index ab17fa1c64e8..670255c9bc82 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -67,7 +67,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .prepare_bandwidth = dcn35_prepare_bandwidth, .optimize_bandwidth = dcn35_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn35_set_drr, .get_position = dcn10_get_position, .set_static_screen_control = dcn35_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 5b486400dfdb..909e14261f9b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -700,6 +700,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .disable_dpp_power_gate = true, .disable_hubp_power_gate = true, + .disable_optc_power_gate = true, /*should the same as above two*/ + .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/ .disable_clock_gate = false, .disable_dsc_power_gate = true, .vsr_support = true, @@ -742,12 +744,13 @@ static const struct dc_debug_options debug_defaults_drv = { }, .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT, .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ + .minimum_z8_residency_time = 2100, .using_dml2 = true, .support_eDP1_5 = true, .enable_hpo_pg_support = false, .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = true, - .disable_idle_power_optimizations = true, + .disable_idle_power_optimizations = false, .dmcub_emulation = false, .disable_boot_optimizations = false, .disable_unbounded_requesting = false, @@ -758,8 +761,10 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, .ignore_pg = true, .psp_disabled_wa = true, - .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400 + .ips2_eval_delay_us = 2000, + .ips2_entry_delay_us = 800, + .disable_dmub_reallow_idle = true, + .static_screen_wait_frames = 2, }; static const struct dc_panel_config panel_config_defaults = { diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 738ee763f24a..84f9b412a4f1 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -147,15 +147,12 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } /* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */ - if (stream->link->psr_settings.psr_feature_enabled) { - if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) - vsc_packet_revision = vsc_packet_rev4; - else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) - vsc_packet_revision = vsc_packet_rev2; - } - - if (stream->link->replay_settings.config.replay_supported) + if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) + vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->replay_settings.config.replay_supported) vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) + vsc_packet_revision = vsc_packet_rev2; /* Update to revision 5 for extended colorimetry support */ if (stream->use_vsc_sdp_for_colorimetry) diff --git a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h index beadb9e42850..ca83e9e5c3ff 100644 --- a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h +++ b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h @@ -234,7 +234,8 @@ union UMSCHAPI__SET_HW_RESOURCES { uint32_t enable_level_process_quantum_check : 1; uint32_t is_vcn0_enabled : 1; uint32_t is_vcn1_enabled : 1; - uint32_t reserved : 27; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 26; }; uint32_t uint32_all; }; @@ -297,9 +298,12 @@ union UMSCHAPI__ADD_QUEUE { struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; struct UMSCH_API_STATUS api_status; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -314,6 +318,7 @@ union UMSCHAPI__REMOVE_QUEUE { uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -337,6 +342,7 @@ union UMSCHAPI__SUSPEND { uint32_t suspend_fence_value; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -356,6 +362,7 @@ union UMSCHAPI__RESUME { enum UMSCH_ENGINE_TYPE engine_type; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -404,6 +411,7 @@ union UMSCHAPI__UPDATE_AFFINITY { union UMSCH_AFFINITY affinity; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -417,6 +425,7 @@ union UMSCHAPI__CHANGE_CONTEXT_PRIORITY_LEVEL { uint64_t context_quantum; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h index 8a8a57c56bc0..ca7ce4251482 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h @@ -54,14 +54,14 @@ #define PPSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team #define PPSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version #define PPSMC_MSG_GetDriverIfVersion 0x03 ///< Get PMFW_DRIVER_IF version -#define PPSMC_MSG_SPARE0 0x04 ///< SPARE -#define PPSMC_MSG_SPARE1 0x05 ///< SPARE -#define PPSMC_MSG_PowerDownVcn 0x06 ///< Power down VCN -#define PPSMC_MSG_PowerUpVcn 0x07 ///< Power up VCN; VCN is power gated by default -#define PPSMC_MSG_SetHardMinVcn 0x08 ///< For wireless display +#define PPSMC_MSG_PowerDownVcn1 0x04 ///< Power down VCN1 +#define PPSMC_MSG_PowerUpVcn1 0x05 ///< Power up VCN1; VCN1 is power gated by default +#define PPSMC_MSG_PowerDownVcn0 0x06 ///< Power down VCN0 +#define PPSMC_MSG_PowerUpVcn0 0x07 ///< Power up VCN0; VCN0 is power gated by default +#define PPSMC_MSG_SetHardMinVcn0 0x08 ///< For wireless display #define PPSMC_MSG_SetSoftMinGfxclk 0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz -#define PPSMC_MSG_SPARE2 0x0A ///< SPARE -#define PPSMC_MSG_SPARE3 0x0B ///< SPARE +#define PPSMC_MSG_SetHardMinVcn1 0x0A ///< For wireless display +#define PPSMC_MSG_SetSoftMinVcn1 0x0B ///< Set soft min for VCN1 clocks (VCLK1 and DCLK1) #define PPSMC_MSG_PrepareMp1ForUnload 0x0C ///< Prepare PMFW for GFX driver unload #define PPSMC_MSG_SetDriverDramAddrHigh 0x0D ///< Set high 32 bits of DRAM address for Driver table transfer #define PPSMC_MSG_SetDriverDramAddrLow 0x0E ///< Set low 32 bits of DRAM address for Driver table transfer @@ -71,7 +71,7 @@ #define PPSMC_MSG_GetEnabledSmuFeatures 0x12 ///< Get enabled features in PMFW #define PPSMC_MSG_SetHardMinSocclkByFreq 0x13 ///< Set hard min for SOC CLK #define PPSMC_MSG_SetSoftMinFclk 0x14 ///< Set hard min for FCLK -#define PPSMC_MSG_SetSoftMinVcn 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK) +#define PPSMC_MSG_SetSoftMinVcn0 0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0) #define PPSMC_MSG_EnableGfxImu 0x16 ///< Enable GFX IMU @@ -84,17 +84,17 @@ #define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x1D ///< Set soft max for SOC CLK #define PPSMC_MSG_SetSoftMaxFclkByFreq 0x1E ///< Set soft max for FCLK -#define PPSMC_MSG_SetSoftMaxVcn 0x1F ///< Set soft max for VCN clocks (VCLK and DCLK) +#define PPSMC_MSG_SetSoftMaxVcn0 0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0) #define PPSMC_MSG_spare_0x20 0x20 -#define PPSMC_MSG_PowerDownJpeg 0x21 ///< Power down Jpeg -#define PPSMC_MSG_PowerUpJpeg 0x22 ///< Power up Jpeg; VCN is power gated by default +#define PPSMC_MSG_PowerDownJpeg0 0x21 ///< Power down Jpeg of VCN0 +#define PPSMC_MSG_PowerUpJpeg0 0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default #define PPSMC_MSG_SetHardMinFclkByFreq 0x23 ///< Set hard min for FCLK #define PPSMC_MSG_SetSoftMinSocclkByFreq 0x24 ///< Set soft min for SOC CLK #define PPSMC_MSG_AllowZstates 0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity -#define PPSMC_MSG_Reserved 0x26 ///< Not used -#define PPSMC_MSG_Reserved1 0x27 ///< Not used, previously PPSMC_MSG_RequestActiveWgp -#define PPSMC_MSG_Reserved2 0x28 ///< Not used, previously PPSMC_MSG_QueryActiveWgp +#define PPSMC_MSG_PowerDownJpeg1 0x26 ///< Power down Jpeg of VCN1 +#define PPSMC_MSG_PowerUpJpeg1 0x27 ///< Power up Jpeg of VCN1; VCN1 is power gated by default +#define PPSMC_MSG_SetSoftMaxVcn1 0x28 ///< Set soft max for VCN1 clocks (VCLK1 and DCLK1) #define PPSMC_MSG_PowerDownIspByTile 0x29 ///< ISP is power gated by default #define PPSMC_MSG_PowerUpIspByTile 0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM #define PPSMC_MSG_SetHardMinIspiclkByFreq 0x2B ///< Set HardMin by frequency for ISPICLK diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index a941fdbf78b6..af427cc7dbb8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -115,6 +115,10 @@ __SMU_DUMMY_MAP(PowerDownVcn), \ __SMU_DUMMY_MAP(PowerUpJpeg), \ __SMU_DUMMY_MAP(PowerDownJpeg), \ + __SMU_DUMMY_MAP(PowerUpJpeg0), \ + __SMU_DUMMY_MAP(PowerDownJpeg0), \ + __SMU_DUMMY_MAP(PowerUpJpeg1), \ + __SMU_DUMMY_MAP(PowerDownJpeg1), \ __SMU_DUMMY_MAP(BacoAudioD3PME), \ __SMU_DUMMY_MAP(ArmD3), \ __SMU_DUMMY_MAP(RunDcBtc), \ @@ -135,6 +139,8 @@ __SMU_DUMMY_MAP(PowerUpSdma), \ __SMU_DUMMY_MAP(SetHardMinIspclkByFreq), \ __SMU_DUMMY_MAP(SetHardMinVcn), \ + __SMU_DUMMY_MAP(SetHardMinVcn0), \ + __SMU_DUMMY_MAP(SetHardMinVcn1), \ __SMU_DUMMY_MAP(SetAllowFclkSwitch), \ __SMU_DUMMY_MAP(SetMinVideoGfxclkFreq), \ __SMU_DUMMY_MAP(ActiveProcessNotify), \ @@ -150,6 +156,8 @@ __SMU_DUMMY_MAP(SetPhyclkVoltageByFreq), \ __SMU_DUMMY_MAP(SetDppclkVoltageByFreq), \ __SMU_DUMMY_MAP(SetSoftMinVcn), \ + __SMU_DUMMY_MAP(SetSoftMinVcn0), \ + __SMU_DUMMY_MAP(SetSoftMinVcn1), \ __SMU_DUMMY_MAP(EnablePostCode), \ __SMU_DUMMY_MAP(GetGfxclkFrequency), \ __SMU_DUMMY_MAP(GetFclkFrequency), \ @@ -161,6 +169,8 @@ __SMU_DUMMY_MAP(SetSoftMaxSocclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMaxFclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMaxVcn), \ + __SMU_DUMMY_MAP(SetSoftMaxVcn0), \ + __SMU_DUMMY_MAP(SetSoftMaxVcn1), \ __SMU_DUMMY_MAP(PowerGateMmHub), \ __SMU_DUMMY_MAP(UpdatePmeRestore), \ __SMU_DUMMY_MAP(GpuChangeState), \ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index b06a3cc43305..9e39f99154f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1402,9 +1402,22 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, if (adev->vcn.harvest_config & (1 << i)) continue; - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - i << 16U, NULL); + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) { + if (i == 0) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0, + i << 16U, NULL); + else if (i == 1) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1, + i << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + i << 16U, NULL); + } + if (ret) return ret; } @@ -1415,9 +1428,34 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, int smu_v14_0_set_jpeg_enable(struct smu_context *smu, bool enable) { - return smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg, - 0, NULL); + struct amdgpu_device *adev = smu->adev; + int i, ret = 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) { + if (i == 0) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg0 : SMU_MSG_PowerDownJpeg0, + i << 16U, NULL); + else if (i == 1 && amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg1 : SMU_MSG_PowerDownJpeg1, + i << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg, + i << 16U, NULL); + } + + if (ret) + return ret; + } + + return ret; } int smu_v14_0_run_btc(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 9310c4758e38..d6de6d97286c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -70,9 +70,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), - MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), - MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1), - MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 1), + MSG_MAP(PowerDownVcn0, PPSMC_MSG_PowerDownVcn0, 1), + MSG_MAP(PowerUpVcn0, PPSMC_MSG_PowerUpVcn0, 1), + MSG_MAP(SetHardMinVcn0, PPSMC_MSG_SetHardMinVcn0, 1), + MSG_MAP(PowerDownVcn1, PPSMC_MSG_PowerDownVcn1, 1), + MSG_MAP(PowerUpVcn1, PPSMC_MSG_PowerUpVcn1, 1), + MSG_MAP(SetHardMinVcn1, PPSMC_MSG_SetHardMinVcn1, 1), MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 1), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), @@ -83,7 +86,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 1), MSG_MAP(SetHardMinSocclkByFreq, PPSMC_MSG_SetHardMinSocclkByFreq, 1), MSG_MAP(SetSoftMinFclk, PPSMC_MSG_SetSoftMinFclk, 1), - MSG_MAP(SetSoftMinVcn, PPSMC_MSG_SetSoftMinVcn, 1), + MSG_MAP(SetSoftMinVcn0, PPSMC_MSG_SetSoftMinVcn0, 1), + MSG_MAP(SetSoftMinVcn1, PPSMC_MSG_SetSoftMinVcn1, 1), MSG_MAP(EnableGfxImu, PPSMC_MSG_EnableGfxImu, 1), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), @@ -91,9 +95,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetHardMinGfxClk, PPSMC_MSG_SetHardMinGfxClk, 1), MSG_MAP(SetSoftMaxSocclkByFreq, PPSMC_MSG_SetSoftMaxSocclkByFreq, 1), MSG_MAP(SetSoftMaxFclkByFreq, PPSMC_MSG_SetSoftMaxFclkByFreq, 1), - MSG_MAP(SetSoftMaxVcn, PPSMC_MSG_SetSoftMaxVcn, 1), - MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1), - MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1), + MSG_MAP(SetSoftMaxVcn0, PPSMC_MSG_SetSoftMaxVcn0, 1), + MSG_MAP(SetSoftMaxVcn1, PPSMC_MSG_SetSoftMaxVcn1, 1), + MSG_MAP(PowerDownJpeg0, PPSMC_MSG_PowerDownJpeg0, 1), + MSG_MAP(PowerUpJpeg0, PPSMC_MSG_PowerUpJpeg0, 1), + MSG_MAP(PowerDownJpeg1, PPSMC_MSG_PowerDownJpeg1, 1), + MSG_MAP(PowerUpJpeg1, PPSMC_MSG_PowerUpJpeg1, 1), MSG_MAP(SetHardMinFclkByFreq, PPSMC_MSG_SetHardMinFclkByFreq, 1), MSG_MAP(SetSoftMinSocclkByFreq, PPSMC_MSG_SetSoftMinSocclkByFreq, 1), MSG_MAP(PowerDownIspByTile, PPSMC_MSG_PowerDownIspByTile, 1), diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c index bd61e20770a5..14a2a8473682 100644 --- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c @@ -52,7 +52,7 @@ * @adapter: I2C adapter for the DDC bus * @offset: register offset * @buffer: buffer for return data - * @size: sizo of the buffer + * @size: size of the buffer * * Reads @size bytes from the DP dual mode adaptor registers * starting at @offset. @@ -116,7 +116,7 @@ EXPORT_SYMBOL(drm_dp_dual_mode_read); * @adapter: I2C adapter for the DDC bus * @offset: register offset * @buffer: buffer for write data - * @size: sizo of the buffer + * @size: size of the buffer * * Writes @size bytes to the DP dual mode adaptor registers * starting at @offset. diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 266826eac4a7..f5d4be897866 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -4111,6 +4111,13 @@ int drm_dp_bw_overhead(int lane_count, int hactive, u32 overhead = 1000000; int symbol_cycles; + if (lane_count == 0 || hactive == 0 || bpp_x16 == 0) { + DRM_DEBUG_KMS("Invalid BW overhead params: lane_count %d, hactive %d, bpp_x16 %d.%04d\n", + lane_count, hactive, + bpp_x16 >> 4, (bpp_x16 & 0xf) * 625); + return 0; + } + /* * DP Standard v2.1 2.6.4.1 * SSC downspread and ref clock variation margin: diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 7352bde299d5..03bd3c7bd0dc 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf, { struct drm_gem_object *obj = dma_buf->priv; - if (!obj->funcs->get_sg_table) + /* + * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers + * that implement their own ->map_dma_buf() do not. + */ + if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf && + !obj->funcs->get_sg_table) return -ENOSYS; return drm_gem_pin(obj); diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3ef6ed41e62b..fba73c38e235 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,9 +33,9 @@ endif subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable -CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init) -CFLAGS_display/intel_display_device.o = $(call cc-disable-warning, override-init) -CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init) +CFLAGS_i915_pci.o = -Wno-override-init +CFLAGS_display/intel_display_device.o = -Wno-override-init +CFLAGS_display/intel_fbdev.o = -Wno-override-init # Support compiling the display code separately for both i915 and xe # drivers. Define I915 when building i915. @@ -118,6 +118,7 @@ gt-y += \ gt/intel_ggtt_fencing.o \ gt/intel_gt.o \ gt/intel_gt_buffer_pool.o \ + gt/intel_gt_ccs_mode.o \ gt/intel_gt_clock_utils.o \ gt/intel_gt_debugfs.o \ gt/intel_gt_engines_debugfs.o \ diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index dfe0b07a122d..06ec04e667e3 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -717,7 +717,6 @@ static void g4x_enable_dp(struct intel_atomic_state *state, { intel_enable_dp(state, encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); - encoder->audio_enable(encoder, pipe_config, conn_state); } static void vlv_enable_dp(struct intel_atomic_state *state, @@ -726,7 +725,6 @@ static void vlv_enable_dp(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { intel_edp_backlight_on(pipe_config, conn_state); - encoder->audio_enable(encoder, pipe_config, conn_state); } static void g4x_pre_enable_dp(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index eda4a8b88590..ac456a2275db 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); /* ensure all panel commands dispatched before enabling transcoder */ wait_for_cmds_dispatched_to_panel(encoder); @@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + /* step7: enable backlight */ intel_backlight_enable(crtc_state, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index fe52c06271ef..52bd3576835b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915, * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. */ -static void fixup_mipi_sequences(struct drm_i915_private *i915, - struct intel_panel *panel) +static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) { u8 *init_otp; int len; - /* Limit this to VLV for now. */ - if (!IS_VALLEYVIEW(i915)) - return; - /* Limit this to v1 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || panel->vbt.dsi.seq_version != 1) @@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915, panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1; } +/* + * Some machines (eg. Lenovo 82TQ) appear to have broken + * VBT sequences: + * - INIT_OTP is not present at all + * - what should be in INIT_OTP is in DISPLAY_ON + * - what should be in DISPLAY_ON is in BACKLIGHT_ON + * (along with the actual backlight stuff) + * + * To make those work we simply swap DISPLAY_ON and INIT_OTP. + * + * TODO: Do we need to limit this to specific machines, + * or examine the contents of the sequences to + * avoid false positives? + */ +static void icl_fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) +{ + if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] && + panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) { + drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n"); + + swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP], + panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]); + } +} + +static void fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) +{ + if (DISPLAY_VER(i915) >= 11) + icl_fixup_mipi_sequences(i915, panel); + else if (IS_VALLEYVIEW(i915)) + vlv_fixup_mipi_sequences(i915, panel); +} + static void parse_mipi_sequence(struct drm_i915_private *i915, struct intel_panel *panel) @@ -3351,6 +3382,9 @@ bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_encoder_da { const struct child_device_config *child = &devdata->child; + if (!devdata) + return false; + if (!intel_bios_encoder_supports_dp(devdata) || !intel_bios_encoder_supports_hdmi(devdata)) return false; diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index f8b33999d43f..0d3da55e1c24 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); - const struct drm_framebuffer *fb = plane_state->hw.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = i915_gem_object_get_dma_address(obj, 0); + base = plane_state->phys_dma_addr; else base = intel_plane_ggtt_offset(plane_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ab2f52d21bad..8af9e6128277 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2709,15 +2709,6 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state) */ intel_de_write(dev_priv, PIPESRC(pipe), PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1)); - - if (!crtc_state->enable_psr2_su_region_et) - return; - - width = drm_rect_width(&crtc_state->psr2_su_area); - height = drm_rect_height(&crtc_state->psr2_su_area); - - intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(pipe), - PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1)); } static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index fe4268813786..9b1bce2624b9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -47,6 +47,7 @@ struct drm_printer; #define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13) #define HAS_DSB(i915) (DISPLAY_INFO(i915)->has_dsb) #define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc) +#define HAS_DSC_MST(__i915) (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915)) #define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0) #define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg) #define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e67cd5b02e84..bf3f942e19c3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -727,6 +727,7 @@ struct intel_plane_state { #define PLANE_HAS_FENCE BIT(0) struct intel_fb_view view; + u32 phys_dma_addr; /* for cursor_needs_physical */ /* Plane pxp decryption state */ bool decrypt; @@ -1422,6 +1423,8 @@ struct intel_crtc_state { u32 psr2_man_track_ctl; + u32 pipe_srcsz_early_tpt; + struct drm_rect psr2_su_area; /* Variable Refresh Rate state */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f0c3ed37b350..abd62bebc46d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -67,6 +67,7 @@ #include "intel_dp_tunnel.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" +#include "intel_drrs.h" #include "intel_fifo_underrun.h" #include "intel_hdcp.h" #include "intel_hdmi.h" @@ -498,7 +499,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) /* The values must be in increasing order */ static const int mtl_rates[] = { 162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000, - 810000, 1000000, 1350000, 2000000, + 810000, 1000000, 2000000, }; static const int icl_rates[] = { 162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000, @@ -1421,7 +1422,8 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, if (DISPLAY_VER(dev_priv) >= 12) return true; - if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A) + if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A && + !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) return true; return false; @@ -1916,8 +1918,9 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) { - if (valid_dsc_bpp[i] < dsc_min_bpp || - valid_dsc_bpp[i] > dsc_max_bpp) + if (valid_dsc_bpp[i] < dsc_min_bpp) + continue; + if (valid_dsc_bpp[i] > dsc_max_bpp) break; ret = dsc_compute_link_config(intel_dp, @@ -2683,15 +2686,6 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA); } -static bool cpu_transcoder_has_drrs(struct drm_i915_private *i915, - enum transcoder cpu_transcoder) -{ - if (HAS_DOUBLE_BUFFERED_M_N(i915)) - return true; - - return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder); -} - static bool can_enable_drrs(struct intel_connector *connector, const struct intel_crtc_state *pipe_config, const struct drm_display_mode *downclock_mode) @@ -2714,7 +2708,7 @@ static bool can_enable_drrs(struct intel_connector *connector, if (pipe_config->has_pch_encoder) return false; - if (!cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder)) + if (!intel_cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder)) return false; return downclock_mode && @@ -6565,6 +6559,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, intel_connector->get_hw_state = intel_ddi_connector_get_hw_state; else intel_connector->get_hw_state = intel_connector_get_hw_state; + intel_connector->sync_state = intel_dp_connector_sync_state; if (!intel_edp_init_connector(intel_dp, intel_connector)) { intel_dp_aux_fini(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 53aec023ce92..b651c990af85 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1355,7 +1355,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return 0; } - if (DISPLAY_VER(dev_priv) >= 10 && + if (HAS_DSC_MST(dev_priv) && drm_dp_sink_supports_dsc(intel_connector->dp.dsc_dpcd)) { /* * TBD pass the connector BPC, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index ff480f171f75..b6d24410740f 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2554,7 +2554,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params, static bool ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { - return (((IS_ELKHARTLAKE(i915) || IS_JASPERLAKE(i915)) && + return ((IS_ELKHARTLAKE(i915) && IS_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) && i915->display.dpll.ref_clks.nssc == 38400; diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index 169ef38ff188..597f8bd6aa1a 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -63,6 +63,15 @@ const char *intel_drrs_type_str(enum drrs_type drrs_type) return str[drrs_type]; } +bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915, + enum transcoder cpu_transcoder) +{ + if (HAS_DOUBLE_BUFFERED_M_N(i915)) + return true; + + return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder); +} + static void intel_drrs_set_refresh_rate_pipeconf(struct intel_crtc *crtc, enum drrs_refresh_rate refresh_rate) @@ -312,9 +321,8 @@ static int intel_drrs_debugfs_status_show(struct seq_file *m, void *unused) mutex_lock(&crtc->drrs.mutex); seq_printf(m, "DRRS capable: %s\n", - str_yes_no(crtc_state->has_drrs || - HAS_DOUBLE_BUFFERED_M_N(i915) || - intel_cpu_transcoder_has_m2_n2(i915, crtc_state->cpu_transcoder))); + str_yes_no(intel_cpu_transcoder_has_drrs(i915, + crtc_state->cpu_transcoder))); seq_printf(m, "DRRS enabled: %s\n", str_yes_no(crtc_state->has_drrs)); diff --git a/drivers/gpu/drm/i915/display/intel_drrs.h b/drivers/gpu/drm/i915/display/intel_drrs.h index 8ef5f93a80ff..0982f95eab72 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.h +++ b/drivers/gpu/drm/i915/display/intel_drrs.h @@ -9,12 +9,15 @@ #include <linux/types.h> enum drrs_type; +enum transcoder; struct drm_i915_private; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; struct intel_connector; +bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915, + enum transcoder cpu_transcoder); const char *intel_drrs_type_str(enum drrs_type drrs_type); bool intel_drrs_is_active(struct intel_crtc *crtc); void intel_drrs_activate(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index d62e050185e7..e4515bf92038 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -340,6 +340,17 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency)); } +static u32 dsb_chicken(struct intel_crtc *crtc) +{ + if (crtc->mode_flags & I915_MODE_FLAG_VRR) + return DSB_CTRL_WAIT_SAFE_WINDOW | + DSB_CTRL_NO_WAIT_VBLANK | + DSB_INST_WAIT_SAFE_WINDOW | + DSB_INST_NO_WAIT_VBLANK; + else + return 0; +} + static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, int dewake_scanline) { @@ -361,6 +372,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id), ctrl | DSB_ENABLE); + intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id), + dsb_chicken(crtc)); + intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id), intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf)); diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 7b42aef37d2f..b6df9baf481b 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) return PTR_ERR(vma); plane_state->ggtt_vma = vma; + + /* + * Pre-populate the dma address before we enter the vblank + * evade critical section as i915_gem_object_get_dma_address() + * will trigger might_sleep() even if it won't actually sleep, + * which is the case when the fb has already been pinned. + */ + if (phys_cursor) + plane_state->phys_dma_addr = + i915_gem_object_get_dma_address(intel_fb_obj(fb), 0); } else { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 6927785fd6ff..b6e539f1342c 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1994,6 +1994,7 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp) void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; struct intel_encoder *encoder; @@ -2013,6 +2014,12 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(cpu_transcoder), crtc_state->psr2_man_track_ctl); + + if (!crtc_state->enable_psr2_su_region_et) + return; + + intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(crtc->pipe), + crtc_state->pipe_srcsz_early_tpt); } static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, @@ -2051,6 +2058,20 @@ exit: crtc_state->psr2_man_track_ctl = val; } +static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state, + bool full_update) +{ + int width, height; + + if (!crtc_state->enable_psr2_su_region_et || full_update) + return 0; + + width = drm_rect_width(&crtc_state->psr2_su_area); + height = drm_rect_height(&crtc_state->psr2_su_area); + + return PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1); +} + static void clip_area_update(struct drm_rect *overlap_damage_area, struct drm_rect *damage_area, struct drm_rect *pipe_src) @@ -2095,21 +2116,36 @@ static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st * cursor fully when cursor is in SU area. */ static void -intel_psr2_sel_fetch_et_alignment(struct intel_crtc_state *crtc_state, - struct intel_plane_state *cursor_state) +intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_rect inter; + struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + struct intel_plane_state *new_plane_state; + struct intel_plane *plane; + int i; - if (!crtc_state->enable_psr2_su_region_et || - !cursor_state->uapi.visible) + if (!crtc_state->enable_psr2_su_region_et) return; - inter = crtc_state->psr2_su_area; - if (!drm_rect_intersect(&inter, &cursor_state->uapi.dst)) - return; + for_each_new_intel_plane_in_state(state, plane, new_plane_state, i) { + struct drm_rect inter; - clip_area_update(&crtc_state->psr2_su_area, &cursor_state->uapi.dst, - &crtc_state->pipe_src); + if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc) + continue; + + if (plane->id != PLANE_CURSOR) + continue; + + if (!new_plane_state->uapi.visible) + continue; + + inter = crtc_state->psr2_su_area; + if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) + continue; + + clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst, + &crtc_state->pipe_src); + } } /* @@ -2152,8 +2188,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - struct intel_plane_state *new_plane_state, *old_plane_state, - *cursor_plane_state = NULL; + struct intel_plane_state *new_plane_state, *old_plane_state; struct intel_plane *plane; bool full_update = false; int i, ret; @@ -2238,13 +2273,6 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1; clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src); - - /* - * Cursor plane new state is stored to adjust su area to cover - * cursor are fully. - */ - if (plane->id == PLANE_CURSOR) - cursor_plane_state = new_plane_state; } /* @@ -2273,9 +2301,13 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, if (ret) return ret; - /* Adjust su area to cover cursor fully as necessary */ - if (cursor_plane_state) - intel_psr2_sel_fetch_et_alignment(crtc_state, cursor_plane_state); + /* + * Adjust su area to cover cursor fully as necessary (early + * transport). This needs to be done after + * drm_atomic_add_affected_planes to ensure visible cursor is added into + * affected planes even when cursor is not updated by itself. + */ + intel_psr2_sel_fetch_et_alignment(state, crtc); intel_psr2_sel_fetch_pipe_alignment(crtc_state); @@ -2338,6 +2370,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, skip_sel_fetch_set_loop: psr2_man_trk_ctl_calc(crtc_state, full_update); + crtc_state->pipe_srcsz_early_tpt = + psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 5f9e748adc89..0cd9c183f621 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1842,8 +1842,6 @@ static void intel_disable_sdvo(struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); u32 temp; - encoder->audio_disable(encoder, old_crtc_state, conn_state); - intel_sdvo_set_active_outputs(intel_sdvo, 0); if (0) intel_sdvo_set_encoder_power_state(intel_sdvo, @@ -1935,8 +1933,6 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, intel_sdvo_set_encoder_power_state(intel_sdvo, DRM_MODE_DPMS_ON); intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag); - - encoder->audio_enable(encoder, pipe_config, conn_state); } static enum drm_mode_status diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5d905f932cb4..eb5bd0743902 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -187,10 +187,11 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; /* - * TRANS_SET_CONTEXT_LATENCY with VRR enabled - * requires this chicken bit on ADL/DG2. + * This bit seems to have two meanings depending on the platform: + * TGL: generate VRR "safe window" for DSB vblank waits + * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR */ - if (DISPLAY_VER(dev_priv) == 13) + if (IS_DISPLAY_VER(dev_priv, 12, 13)) intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0, PIPE_VBLANK_WITH_DELAY); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index e941e2e4fd14..860574d04f88 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2295,6 +2295,9 @@ static u8 skl_get_plane_caps(struct drm_i915_private *i915, if (HAS_4TILE(i915)) caps |= INTEL_PLANE_CAP_TILING_4; + if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(i915)) + return caps; + if (skl_plane_has_rc_ccs(i915, pipe, plane_id)) { caps |= INTEL_PLANE_CAP_CCS_RC; if (DISPLAY_VER(i915) >= 12) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index fa46d2308b0e..81bf2216371b 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm) struct i915_vma *vma; int ret; + if (!intel_gt_needs_wa_16018031267(vm->gt)) + return 0; + /* The memory will be used only by GPU. */ obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, I915_BO_ALLOC_VOLATILE | diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1ade568ffbfa..7a6dc371c384 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -908,6 +908,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) info->engine_mask &= ~BIT(GSC0); } + /* + * Do not create the command streamer for CCS slices beyond the first. + * All the workload submitted to the first engine will be shared among + * all the slices. + * + * Once the user will be allowed to customize the CCS mode, then this + * check needs to be removed. + */ + if (IS_DG2(gt->i915)) { + u8 first_ccs = __ffs(CCS_MASK(gt)); + + /* Mask off all the CCS engine */ + info->engine_mask &= ~GENMASK(CCS3, CCS0); + /* Put back in the first CCS engine */ + info->engine_mask |= BIT(_CCS(first_ccs)); + } + return info->engine_mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 96bdb93a948d..fb7bff27b45a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 42aade0faf2d..b061a0a0d6b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3272,6 +3272,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a425db5ed3a2..6a2c2718bcc3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, return I915_MAP_WC; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt) +{ + /* Wa_16018031267, Wa_16018063123 */ + return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71)); +} + bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) { return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 608f5c872928..003eb93b826f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -82,17 +82,18 @@ struct drm_printer; ##__VA_ARGS__); \ } while (0) -#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ - IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \ - engine->class == COPY_ENGINE_CLASS && engine->instance == 0) - static inline bool gt_is_root(struct intel_gt *gt) { return !gt->info.id; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt); bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); +#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ + intel_gt_needs_wa_16018031267(engine->gt) && \ + engine->class == COPY_ENGINE_CLASS && engine->instance == 0) + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c new file mode 100644 index 000000000000..044219c5960a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_ccs_mode.h" +#include "intel_gt_regs.h" + +void intel_gt_apply_ccs_mode(struct intel_gt *gt) +{ + int cslice; + u32 mode = 0; + int first_ccs = __ffs(CCS_MASK(gt)); + + if (!IS_DG2(gt->i915)) + return; + + /* Build the value for the fixed CCS load balancing */ + for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { + if (CCS_MASK(gt) & BIT(cslice)) + /* + * If available, assign the cslice + * to the first available engine... + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs); + + else + /* + * ... otherwise, mark the cslice as + * unavailable if no CCS dispatches here + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, + XEHP_CCS_MODE_CSLICE_MASK); + } + + intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h new file mode 100644 index 000000000000..9e5549caeb26 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __INTEL_GT_CCS_MODE_H__ +#define __INTEL_GT_CCS_MODE_H__ + +struct intel_gt; + +void intel_gt_apply_ccs_mode(struct intel_gt *gt); + +#endif /* __INTEL_GT_CCS_MODE_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 50962cfd1353..743fe3566722 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1477,8 +1477,14 @@ #define ECOBITS_PPGTT_CACHE4B (0 << 8) #define GEN12_RCU_MODE _MMIO(0x14800) +#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) +#define XEHP_CCS_MODE _MMIO(0x14804) +#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */ +#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1) +#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH)) + #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d67d44611c28..6ec3582c9735 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -10,6 +10,7 @@ #include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_gt.h" +#include "intel_gt_ccs_mode.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -51,7 +52,8 @@ * registers belonging to BCS, VCS or VECS should be implemented in * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific * engine's MMIO range but that are part of of the common RCS/CCS reset domain - * should be implemented in general_render_compute_wa_init(). + * should be implemented in general_render_compute_wa_init(). The settings + * about the CCS load balancing should be added in ccs_engine_wa_mode(). * * - GT workarounds: the list of these WAs is applied whenever these registers * revert to their default values: on GPU reset, suspend/resume [1]_, etc. @@ -1653,6 +1655,7 @@ static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Wa_14018575942 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ @@ -2853,6 +2856,28 @@ add_render_compute_tuning_settings(struct intel_gt *gt, wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC); } +static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct intel_gt *gt = engine->gt; + + if (!IS_DG2(gt->i915)) + return; + + /* + * Wa_14019159160: This workaround, along with others, leads to + * significant challenges in utilizing load balancing among the + * CCS slices. Consequently, an architectural decision has been + * made to completely disable automatic CCS load balancing. + */ + wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE); + + /* + * After having disabled automatic load balancing we need to + * assign all slices to a single CCS. We will call it CCS mode 1 + */ + intel_gt_apply_ccs_mode(gt); +} + /* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a @@ -3003,8 +3028,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal * to a single RCS/CCS engine's workaround list since * they're reset as part of the general render domain reset. */ - if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) { general_render_compute_wa_init(engine, wal); + ccs_engine_wa_mode(engine, wal); + } if (engine->class == COMPUTE_CLASS) ccs_engine_wa_init(engine, wal); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 9ee902d5b72c..4b9233c07a22 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -800,7 +800,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_cleanup_modeset2; ret = intel_pxp_init(i915); - if (ret != -ENODEV) + if (ret && ret != -ENODEV) drm_dbg(&i915->drm, "pxp init failed with %d\n", ret); ret = intel_display_driver_probe(i915); diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8c3f443c8347..b758fd110c20 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -72,12 +72,13 @@ hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat, struct intel_uncore *uncore = ddat->uncore; intel_wakeref_t wakeref; - mutex_lock(&hwmon->hwmon_lock); + with_intel_runtime_pm(uncore->rpm, wakeref) { + mutex_lock(&hwmon->hwmon_lock); - with_intel_runtime_pm(uncore->rpm, wakeref) intel_uncore_rmw(uncore, reg, clear, set); - mutex_unlock(&hwmon->hwmon_lock); + mutex_unlock(&hwmon->hwmon_lock); + } } /* @@ -136,20 +137,21 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy) else rgaddr = hwmon->rg.energy_status_all; - mutex_lock(&hwmon->hwmon_lock); + with_intel_runtime_pm(uncore->rpm, wakeref) { + mutex_lock(&hwmon->hwmon_lock); - with_intel_runtime_pm(uncore->rpm, wakeref) reg_val = intel_uncore_read(uncore, rgaddr); - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; - ei->reg_val_prev = reg_val; + if (reg_val >= ei->reg_val_prev) + ei->accum_energy += reg_val - ei->reg_val_prev; + else + ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + ei->reg_val_prev = reg_val; - *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, - hwmon->scl_shift_energy); - mutex_unlock(&hwmon->hwmon_lock); + *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, + hwmon->scl_shift_energy); + mutex_unlock(&hwmon->hwmon_lock); + } } static ssize_t @@ -404,6 +406,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) /* Block waiting for GuC reset to complete when needed */ for (;;) { + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); mutex_lock(&hwmon->hwmon_lock); prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE); @@ -417,14 +420,13 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) } mutex_unlock(&hwmon->hwmon_lock); + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); schedule(); } finish_wait(&ddat->waitq, &wait); if (ret) - goto unlock; - - wakeref = intel_runtime_pm_get(ddat->uncore->rpm); + goto exit; /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ if (val == PL1_DISABLE) { @@ -444,9 +446,8 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: - intel_runtime_pm_put(ddat->uncore->rpm, wakeref); -unlock: mutex_unlock(&hwmon->hwmon_lock); + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index ba82277254b7..cc41974cee74 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -25,6 +25,8 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/cpufeature.h> +#include <linux/bug.h> +#include <linux/build_bug.h> #include <asm/fpu/api.h> #include "i915_memcpy.h" diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e00557e1a57f..3b2e49ce29ba 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4599,7 +4599,7 @@ #define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \ _MTL_CHICKEN_TRANS_A, \ _MTL_CHICKEN_TRANS_B) -#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */ +#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */ #define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */ #define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27) #define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d09aad34ba37..b70715b1411d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -34,6 +34,7 @@ #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" #include "gt/intel_tlb.h" @@ -103,12 +104,42 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref) static int __i915_vma_active(struct i915_active *ref) { - return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_tryget(vma)) + return -ENOENT; + + /* + * Exclude global GTT VMA from holding a GT wakeref + * while active, otherwise GPU never goes idle. + */ + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we and our _retire() counterpart can be + * called asynchronously, storing a wakeref tracking + * handle inside struct i915_vma is not safe, and + * there is no other good place for that. Hence, + * use untracked variants of intel_gt_pm_get/put(). + */ + intel_gt_pm_get_untracked(vma->vm->gt); + } + + return 0; } static void __i915_vma_retire(struct i915_active *ref) { - i915_vma_put(active_to_vma(ref)); + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we can be called from atomic contexts, + * use an async variant of intel_gt_pm_put(). + */ + intel_gt_pm_put_async_untracked(vma->vm->gt); + } + + i915_vma_put(vma); } static struct i915_vma * @@ -1404,7 +1435,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, struct i915_vma_work *work = NULL; struct dma_fence *moving = NULL; struct i915_vma_resource *vma_res = NULL; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref; unsigned int bound; int err; @@ -1424,8 +1455,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err; - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + /* + * In case of a global GTT, we must hold a runtime-pm wakeref + * while global PTEs are updated. In other cases, we hold + * the rpm reference while the VMA is active. Since runtime + * resume may require allocations, which are forbidden inside + * vm->mutex, get the first rpm wakeref outside of the mutex. + */ + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); if (flags & vma->vm->bind_async_flags) { /* lock VM */ @@ -1561,8 +1598,7 @@ err_fence: if (work) dma_fence_work_commit_imm(&work->base); err_rpm: - if (wakeref) - intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); if (moving) dma_fence_put(moving); diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 12feecf71e75..6fb65b01d778 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -378,9 +378,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) dma_addr_t *dma_addrs; struct nouveau_fence *fence; - src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); - dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); - dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL); + src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); + dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); + dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, npages); @@ -406,11 +406,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) migrate_device_pages(src_pfns, dst_pfns, npages); nouveau_dmem_fence_done(&fence); migrate_device_finalize(src_pfns, dst_pfns, npages); - kfree(src_pfns); - kfree(dst_pfns); + kvfree(src_pfns); + kvfree(dst_pfns); for (i = 0; i < npages; i++) dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); - kfree(dma_addrs); + kvfree(dma_addrs); } void diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 0a0a11dc9ec0..ee02cd833c5e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -812,15 +812,15 @@ op_remap(struct drm_gpuva_op_remap *r, struct drm_gpuva_op_unmap *u = r->unmap; struct nouveau_uvma *uvma = uvma_from_va(u->va); u64 addr = uvma->va.va.addr; - u64 range = uvma->va.va.range; + u64 end = uvma->va.va.addr + uvma->va.va.range; if (r->prev) addr = r->prev->va.addr + r->prev->va.range; if (r->next) - range = r->next->va.addr - addr; + end = r->next->va.addr; - op_unmap_range(u, addr, range); + op_unmap_range(u, addr, end - addr); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 986e8d547c94..060c74a80eb1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -420,7 +420,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch, return ret; } else { ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, - &args, sizeof(args));; + &args, sizeof(args)); if (ret) return ret; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 9063ce254642..fd8e44992184 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -441,19 +441,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev) gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "shader power transition timeout"); gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "tiler power transition timeout"); gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, - val, !val, 0, 1000); + val, !val, 0, 2000); if (ret) dev_err(pfdev->dev, "l2 power transition timeout"); } diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 281edab518cd..d6ea01f3797b 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -421,7 +421,6 @@ int qxl_surface_id_alloc(struct qxl_device *qdev, { uint32_t handle; int idr_ret; - int count = 0; again: idr_preload(GFP_ATOMIC); spin_lock(&qdev->surf_id_idr_lock); @@ -433,7 +432,6 @@ again: handle = idr_ret; if (handle >= qdev->rom->n_surfaces) { - count++; spin_lock(&qdev->surf_id_idr_lock); idr_remove(&qdev->surf_id_idr, handle); spin_unlock(&qdev->surf_id_idr_lock); diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index dd0f834d881c..506ae1f5e099 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -145,7 +145,7 @@ static int qxl_process_single_command(struct qxl_device *qdev, struct qxl_release *release; struct qxl_bo *cmd_bo; void *fb_cmd; - int i, ret, num_relocs; + int i, ret; int unwritten; switch (cmd->type) { @@ -200,7 +200,6 @@ static int qxl_process_single_command(struct qxl_device *qdev, } /* fill out reloc info structs */ - num_relocs = 0; for (i = 0; i < cmd->relocs_num; ++i) { struct drm_qxl_reloc reloc; struct drm_qxl_reloc __user *u = u64_to_user_ptr(cmd->relocs); @@ -230,7 +229,6 @@ static int qxl_process_single_command(struct qxl_device *qdev, reloc_info[i].dst_bo = cmd_bo; reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset; } - num_relocs++; /* reserve and validate the reloc dst bo */ if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle) { diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 48170694ac6b..18efb3fe1c00 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -17,9 +17,7 @@ static const uint32_t formats_cluster[] = { DRM_FORMAT_XRGB2101010, - DRM_FORMAT_ARGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ABGR2101010, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, DRM_FORMAT_XBGR8888, diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 3c4f5a392b06..58c8161289fe 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -71,13 +71,19 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + /* + * It's perfectly valid to initialize an entity without having a valid + * scheduler attached. It's just not valid to use the scheduler before it + * is initialized itself. + */ entity->sched_list = num_sched_list > 1 ? sched_list : NULL; RCU_INIT_POINTER(entity->last_scheduled, NULL); RB_CLEAR_NODE(&entity->rb_tree_node); - if (!sched_list[0]->sched_rq) { - /* Warn drivers not to do this and to fix their DRM - * calling order. + if (num_sched_list && !sched_list[0]->sched_rq) { + /* Since every entry covered by num_sched_list + * should be non-NULL and therefore we warn drivers + * not to do this and to fix their DRM calling order. */ pr_warn("%s: called with uninitialized scheduler\n", __func__); } else if (num_sched_list) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index d3e308fdfd5b..c7d90f96d16a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1444,12 +1444,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw) root, "system_ttm"); ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM), root, "vram_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), - root, "gmr_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), - root, "mob_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), - root, "system_mob_ttm"); + if (vmw->has_gmr) + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), + root, "gmr_ttm"); + if (vmw->has_mob) { + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), + root, "mob_ttm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), + root, "system_mob_ttm"); + } } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 5a428ca00f10..c29a850859ad 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -172,8 +172,8 @@ subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ -Ddrm_i915_gem_object=xe_bo \ -Ddrm_i915_private=xe_device -CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init) -CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init) +CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init +CFLAGS_i915-display/intel_display_device.o = -Wno-override-init # Rule to build SOC code shared with i915 $(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6603a0ea79c5..9c0837b6fdfc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -144,9 +144,6 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, .mem_type = XE_PL_TT, }; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = XE_PL_TT; } } @@ -181,25 +178,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } places[*c] = place; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = mem_type; } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo->props.preferred_gt == XE_GT1) { - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - } else { - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - } + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, @@ -223,17 +210,8 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, { u32 c = 0; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; - - /* The order of placements should indicate preferred location */ - - if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) { - try_add_system(xe, bo, bo_flags, &c); - try_add_vram(xe, bo, bo_flags, &c); - } else { - try_add_vram(xe, bo, bo_flags, &c); - try_add_system(xe, bo, bo_flags, &c); - } + try_add_vram(xe, bo, bo_flags, &c); + try_add_system(xe, bo, bo_flags, &c); try_add_stolen(xe, bo, bo_flags, &c); if (!c) @@ -1126,13 +1104,6 @@ static void xe_gem_object_close(struct drm_gem_object *obj, } } -static bool should_migrate_to_system(struct xe_bo *bo) -{ - struct xe_device *xe = xe_bo_device(bo); - - return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; -} - static vm_fault_t xe_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; @@ -1141,7 +1112,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK; vm_fault_t ret; - int idx, r = 0; + int idx; if (needs_rpm) xe_device_mem_access_get(xe); @@ -1153,17 +1124,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) if (drm_dev_enter(ddev, &idx)) { trace_xe_bo_cpu_fault(bo); - if (should_migrate_to_system(bo)) { - r = xe_bo_migrate(bo, XE_PL_TT); - if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) - ret = VM_FAULT_NOPAGE; - else if (r) - ret = VM_FAULT_SIGBUS; - } - if (!ret) - ret = ttm_bo_vm_fault_reserved(vmf, - vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); @@ -1291,9 +1253,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; - bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; - bo->props.preferred_gt = XE_BO_PROPS_INVALID; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; bo->ttm.priority = XE_BO_PRIORITY_NORMAL; INIT_LIST_HEAD(&bo->pinned_link); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 14ef13b7b421..86422e113d39 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -56,25 +56,6 @@ struct xe_bo { */ struct list_head client_link; #endif - /** @props: BO user controlled properties */ - struct { - /** @preferred_mem: preferred memory class for this BO */ - s16 preferred_mem_class; - /** @prefered_gt: preferred GT for this BO */ - s16 preferred_gt; - /** @preferred_mem_type: preferred memory type */ - s32 preferred_mem_type; - /** - * @cpu_atomic: the CPU expects to do atomics operations to - * this BO - */ - bool cpu_atomic; - /** - * @device_atomic: the device expects to do atomics operations - * to this BO - */ - bool device_atomic; - } props; /** @freed: List node for delayed put. */ struct llist_node freed; /** @created: Whether the bo has passed initial creation */ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ca85e81fdb44..d32ff3857e65 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); + if (xe->preempt_fence_wq) + destroy_workqueue(xe->preempt_fence_wq); + if (xe->ordered_wq) destroy_workqueue(xe->ordered_wq); @@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.evicted); + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); - if (!xe->ordered_wq || !xe->unordered_wq) { + if (!xe->ordered_wq || !xe->unordered_wq || + !xe->preempt_fence_wq) { + /* + * Cleanup done in xe_device_destroy via + * drmm_add_action_or_reset register above + */ drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); err = -ENOMEM; goto err; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 14be34d9f543..d413bc2c6be5 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -58,7 +58,7 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) { - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE)) + if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) gt_id = 0; return gt_id ? tile->media_gt : tile->primary_gt; @@ -79,7 +79,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) if (MEDIA_VER(xe) >= 13) { gt = xe_tile_get_gt(root_tile, gt_id); } else { - if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE)) + if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) gt_id = 0; gt = xe->tiles[gt_id].primary_gt; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9785eef2e5a4..8e3a222b41cf 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -363,6 +363,9 @@ struct xe_device { /** @ufence_wq: user fence wait queue */ wait_queue_head_t ufence_wq; + /** @preempt_fence_wq: used to serialize preempt fences */ + struct workqueue_struct *preempt_fence_wq; + /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 826c8b389672..cc5e0f75de3c 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -94,48 +94,16 @@ * Unlock all */ +/* + * Add validation and rebinding to the drm_exec locking loop, since both can + * trigger eviction which may require sleeping dma_resv locks. + */ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); - struct drm_gem_object *obj; - unsigned long index; - int num_fences; - int ret; - - ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); - if (ret) - return ret; - - /* - * 1 fence slot for the final submit, and 1 more for every per-tile for - * GPU bind and 1 extra for CPU bind. Note that there are potentially - * many vma per object/dma-resv, however the fence slot will just be - * re-used, since they are largely the same timeline and the seqno - * should be in order. In the case of CPU bind there is dummy fence used - * for all CPU binds, so no need to have a per-tile slot for that. - */ - num_fences = 1 + 1 + vm->xe->info.tile_count; - /* - * We don't know upfront exactly how many fence slots we will need at - * the start of the exec, since the TTM bo_validate above can consume - * numerous fence slots. Also due to how the dma_resv_reserve_fences() - * works it only ensures that at least that many fence slots are - * available i.e if there are already 10 slots available and we reserve - * two more, it can just noop without reserving anything. With this it - * is quite possible that TTM steals some of the fence slots and then - * when it comes time to do the vma binding and final exec stage we are - * lacking enough fence slots, leading to some nasty BUG_ON() when - * adding the fences. Hence just add our own fences here, after the - * validate stage. - */ - drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) { - ret = dma_resv_reserve_fences(obj->resv, num_fences); - if (ret) - return ret; - } - - return 0; + /* The fence slot added here is intended for the exec sched job. */ + return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs = 0, num_ufence = 0; struct xe_sched_job *job; - struct dma_fence *rebind_fence; struct xe_vm *vm; bool write_locked, skip_retry = false; ktime_t end = 0; @@ -290,39 +257,7 @@ retry: goto err_exec; } - /* - * Rebind any invalidated userptr or evicted BOs in the VM, non-compute - * VM mode only. - */ - rebind_fence = xe_vm_rebind(vm, false); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); - goto err_put_job; - } - - /* - * We store the rebind_fence in the VM so subsequent execs don't get - * scheduled before the rebinds of userptrs / evicted BOs is complete. - */ - if (rebind_fence) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = rebind_fence; - } - if (vm->rebind_fence) { - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &vm->rebind_fence->flags)) { - dma_fence_put(vm->rebind_fence); - vm->rebind_fence = NULL; - } else { - dma_fence_get(vm->rebind_fence); - err = drm_sched_job_add_dependency(&job->drm, - vm->rebind_fence); - if (err) - goto err_put_job; - } - } - - /* Wait behind munmap style rebinds */ + /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, xe_vm_resv(vm), diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 11e150f4c0c1..ead25d5e723e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -448,7 +448,7 @@ find_hw_engine(struct xe_device *xe, { u32 idx; - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; if (eci.gt_id >= xe->info.gt_count) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 62b3d9d1d7cd..462b33195032 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -148,6 +148,11 @@ struct xe_exec_queue { const struct xe_ring_ops *ring_ops; /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ struct drm_sched_entity *entity; + /** + * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed + * Protected by @vm's resv. Unused if @vm == NULL. + */ + u64 tlb_flush_seqno; /** @lrc: logical ring context for this exec queue */ struct xe_lrc lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 241c294270d9..fa9e9853c53b 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); - unsigned int num_shared = 2; /* slots for bind + move */ int err; - err = xe_vm_prepare_vma(exec, vma, num_shared); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f03e077f81a0..e598a4363d01 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); spin_lock_init(>->tlb_invalidation.pending_lock); spin_lock_init(>->tlb_invalidation.lock); - gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, xe_gt_tlb_fence_timeout); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 70c615dd1498..07b2f724ec45 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -177,13 +177,6 @@ struct xe_gt { * xe_gt_tlb_fence_timeout after the timeut interval is over. */ struct delayed_work fence_tdr; - /** @tlb_invalidation.fence_context: context for TLB invalidation fences */ - u64 fence_context; - /** - * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by - * tlb_invalidation.lock - */ - u32 fence_seqno; /** @tlb_invalidation.lock: protects TLB invalidation fences */ spinlock_t lock; } tlb_invalidation; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ff77bc8da1b2..e2a4c3b5e9ff 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1220,7 +1220,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) init_waitqueue_head(&ge->suspend_wait); timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : - q->sched_props.job_timeout_ms; + msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, get_submit_wq(guc), q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 7ad853b0788a..1426febe86eb 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -97,7 +97,6 @@ static void set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END 0 { const u32 base = hwe->mmio_base; @@ -168,7 +167,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 dg2_xcs_offsets[] = { @@ -202,7 +201,7 @@ static const u8 dg2_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 gen12_rcs_offsets[] = { @@ -298,7 +297,7 @@ static const u8 gen12_rcs_offsets[] = { REG(0x084), NOP(1), - END + 0 }; static const u8 xehp_rcs_offsets[] = { @@ -339,7 +338,7 @@ static const u8 xehp_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 dg2_rcs_offsets[] = { @@ -382,7 +381,7 @@ static const u8 dg2_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 mtl_rcs_offsets[] = { @@ -425,7 +424,7 @@ static const u8 mtl_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; #define XE2_CTX_COMMON \ @@ -471,7 +470,7 @@ static const u8 xe2_rcs_offsets[] = { LRI(1, 0), /* [0x47] */ REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - END + 0 }; static const u8 xe2_bcs_offsets[] = { @@ -482,16 +481,15 @@ static const u8 xe2_bcs_offsets[] = { REG16(0x200), /* [0x42] BCS_SWCTRL */ REG16(0x204), /* [0x44] BLIT_CCTL */ - END + 0 }; static const u8 xe2_xcs_offsets[] = { XE2_CTX_COMMON, - END + 0 }; -#undef END #undef REG16 #undef REG #undef LRI diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7bce2a332603..7d50c6e89d8e 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence) struct xe_exec_queue *q = pfence->q; pfence->error = q->ops->suspend(q); - queue_work(system_unbound_wq, &pfence->preempt_work); + queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work); return true; } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 7f54bc3e389d..4efc8c1a3d7a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt, spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, >->tlb_invalidation.lock, - gt->tlb_invalidation.fence_context, - ++gt->tlb_invalidation.fence_seqno); + dma_fence_context_alloc(1), 1); spin_unlock_irq(>->tlb_invalidation.lock); INIT_LIST_HEAD(&ifence->base.link); @@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); if (err) goto err; + + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + goto err; + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); @@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue * non-faulting LR, in particular on user-space batch buffer chaining, * it needs to be done here. */ - if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || - (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { + if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); + } else if (rebind && !xe_vm_in_lr_mode(vm)) { + /* We bump also if batch_invalidate_tlb is true */ + vm->tlb_flush_seqno++; } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue } /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && + dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || last_munmap_rebind ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); @@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu struct dma_fence *fence = NULL; struct invalidation_fence *ifence; struct xe_range_fence *rfence; + int err; LLIST_HEAD(deferred); @@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, num_entries); + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + return ERR_PTR(err); + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 92bb06c0586e..075f9eaef031 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -132,7 +132,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id > XE_MAX_GT_PER_TILE) + if (eci->gt_id >= XE_MAX_GT_PER_TILE) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index c4edffcd4a32..5b2b37b59813 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - struct xe_vm *vm = job->q->vm; struct xe_gt *gt = job->q->gt; - if (vm && vm->batch_invalidate_tlb) { + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); @@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; - struct xe_vm *vm = job->q->vm; dw[i++] = preparser_disable(true); @@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); } - if (vm && vm->batch_invalidate_tlb) + if (job->ring_ops_flush_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); dw[i++] = preparser_disable(false); - if (!vm || !vm->batch_invalidate_tlb) + if (!job->ring_ops_flush_tlb) i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); @@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - struct xe_vm *vm = job->q->vm; u32 mask_flags = 0; dw[i++] = preparser_disable(true); @@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ - i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); + i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 8151ddafb940..b0c7fa4693cf 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job) void xe_sched_job_arm(struct xe_sched_job *job) { + struct xe_exec_queue *q = job->q; + struct xe_vm *vm = q->vm; + + if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) && + (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) { + xe_vm_assert_held(vm); + q->tlb_flush_seqno = vm->tlb_flush_seqno; + job->ring_ops_flush_tlb = true; + } + drm_sched_job_arm(&job->drm); } diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index b1d83da50a53..5e12724219fd 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -39,6 +39,8 @@ struct xe_sched_job { } user_fence; /** @migrate_flush_flags: Additional flush flags for migration jobs */ u32 migrate_flush_flags; + /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ + bool ring_ops_flush_tlb; /** @batch_addr: batch buffer address of job */ u64 batch_addr[]; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f88faef4142b..62d1ef8867a8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) return 0; } +/** + * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas + * @vm: The vm for which we are rebinding. + * @exec: The struct drm_exec with the locked GEM objects. + * @num_fences: The number of fences to reserve for the operation, not + * including rebinds and validations. + * + * Validates all evicted gem objects and rebinds their vmas. Note that + * rebindings may cause evictions and hence the validation-rebind + * sequence is rerun until there are no more objects to validate. + * + * Return: 0 on success, negative error code on error. In particular, + * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if + * the drm_exec transaction needs to be restarted. + */ +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gem_object *obj; + unsigned long index; + int ret; + + do { + ret = drm_gpuvm_validate(&vm->gpuvm, exec); + if (ret) + return ret; + + ret = xe_vm_rebind(vm, false); + if (ret) + return ret; + } while (!list_empty(&vm->gpuvm.evict.list)); + + drm_exec_for_each_locked_object(exec, index, obj) { + ret = dma_resv_reserve_fences(obj->resv, num_fences); + if (ret) + return ret; + } + + return 0; +} + static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, bool *done) { int err; - /* - * 1 fence for each preempt fence plus a fence for each tile from a - * possible rebind - */ - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + - vm->xe->info.tile_count); + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); if (err) return err; @@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return 0; } - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); if (err) return err; @@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, if (err) return err; - return drm_gpuvm_validate(&vm->gpuvm, exec); + /* + * Add validation and rebinding to the locking loop since both can + * cause evictions which may require blocing dma_resv locks. + * The fence reservation here is intended for the new preempt fences + * we attach at the end of the rebind work. + */ + return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); struct drm_exec exec; - struct dma_fence *rebind_fence; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); ktime_t end = 0; @@ -568,18 +609,11 @@ retry: if (err) goto out_unlock; - rebind_fence = xe_vm_rebind(vm, true); - if (IS_ERR(rebind_fence)) { - err = PTR_ERR(rebind_fence); + err = xe_vm_rebind(vm, true); + if (err) goto out_unlock; - } - if (rebind_fence) { - dma_fence_wait(rebind_fence, false); - dma_fence_put(rebind_fence); - } - - /* Wait on munmap style VM unbinds */ + /* Wait on rebinds and munmap style VM unbinds */ wait = dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); @@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { - struct dma_fence *fence = NULL; + struct dma_fence *fence; struct xe_vma *vma, *next; lockdep_assert_held(&vm->lock); if (xe_vm_in_lr_mode(vm) && !rebind_worker) - return NULL; + return 0; xe_vm_assert_held(vm); list_for_each_entry_safe(vma, next, &vm->rebind_list, @@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) xe_assert(vm->xe, vma->tile_present); list_del_init(&vma->combined_links.rebind); - dma_fence_put(fence); if (rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); if (IS_ERR(fence)) - return fence; + return PTR_ERR(fence); + dma_fence_put(fence); } - return fence; + return 0; } static void xe_vma_free(struct xe_vma *vma) @@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } /** - * xe_vm_prepare_vma() - drm_exec utility to lock a vma + * xe_vm_lock_vma() - drm_exec utility to lock a vma * @exec: The drm_exec object we're currently locking for. * @vma: The vma for witch we want to lock the vm resv and any attached * object's resv. - * @num_shared: The number of dma-fence slots to pre-allocate in the - * objects' reservation objects. * * Return: 0 on success, negative error code on error. In particular * may return -EDEADLK on WW transaction contention and -EINTR if * an interruptible wait is terminated by a signal. */ -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared) +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); struct xe_bo *bo = xe_vma_bo(vma); int err; XE_WARN_ON(!vm); - if (num_shared) - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - else - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (!err && bo && !bo->vm) { - if (num_shared) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); - else - err = drm_exec_lock_obj(exec, &bo->ttm.base); - } + + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) + err = drm_exec_lock_obj(exec, &bo->ttm.base); return err; } @@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - err = xe_vm_prepare_vma(&exec, vma, 0); + err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; @@ -1589,7 +1614,6 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); - dma_fence_put(vm->rebind_fence); kfree(vm); } @@ -2512,7 +2536,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, lockdep_assert_held_write(&vm->lock); - err = xe_vm_prepare_vma(exec, vma, 1); + err = xe_vm_lock_vma(exec, vma); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 6df1f1c7f85d..306cd0934a19 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm); int xe_vm_userptr_check_repin(struct xe_vm *vm); -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); int xe_vm_invalidate_vma(struct xe_vma *vma); @@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared); +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); + +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences); /** * xe_vm_resv() - Return's the vm's reservation object diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index ae5fb565f6bf..badf3945083d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -177,9 +177,6 @@ struct xe_vm { */ struct list_head rebind_list; - /** @rebind_fence: rebind fence from execbuf */ - struct dma_fence *rebind_fence; - /** * @destroy_work: worker to destroy VM, needed as a dma_fence signaling * from an irq context can be last put and the destroy needs to be able @@ -264,6 +261,11 @@ struct xe_vm { bool capture_once; } error_capture; + /** + * @tlb_flush_seqno: Required TLB flush seqno for the next exec. + * protected by the vm resv. + */ + u64 tlb_flush_seqno; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index a6861660cb8c..79870dd7a014 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -536,11 +536,12 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, if (read_write == I2C_SMBUS_READ || command == I2C_SMBUS_BLOCK_PROC_CALL) { - status = i801_get_block_len(priv); - if (status < 0) + len = i801_get_block_len(priv); + if (len < 0) { + status = len; goto out; + } - len = status; data->block[0] = len; inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 76f79b68cef8..888ca636f3f3 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -324,6 +324,7 @@ static void decode_ISR(unsigned int val) decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val); } +#ifdef CONFIG_I2C_PXA_SLAVE static const struct bits icr_bits[] = { PXA_BIT(ICR_START, "START", NULL), PXA_BIT(ICR_STOP, "STOP", NULL), @@ -342,7 +343,6 @@ static const struct bits icr_bits[] = { PXA_BIT(ICR_UR, "UR", "ur"), }; -#ifdef CONFIG_I2C_PXA_SLAVE static void decode_ICR(unsigned int val) { decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index e7a44929f0da..33228c1c8980 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3228,7 +3228,7 @@ out: static void iommu_snp_enable(void) { #ifdef CONFIG_KVM_AMD_SEV - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; /* * The SNP support requires that IOMMU must be enabled, and is @@ -3236,12 +3236,14 @@ static void iommu_snp_enable(void) */ if (no_iommu || iommu_default_passthrough()) { pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); return; } amd_iommu_snp_en = check_feature(FEATURE_SNP); if (!amd_iommu_snp_en) { pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); return; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5ed036225e69..41f93c3ab160 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1139,7 +1139,8 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, * requires a breaking update, zero the V bit, write all qwords * but 0, then set qword 0 */ - unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V); + unused_update.data[0] = entry->data[0] & + cpu_to_le64(~STRTAB_STE_0_V); entry_set(smmu, sid, entry, &unused_update, 0, 1); entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1); entry_set(smmu, sid, entry, target, 0, 1); @@ -1453,14 +1454,17 @@ static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT)); } -static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target) +static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, + struct arm_smmu_ste *target) { memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( STRTAB_STE_0_V | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS)); - target->data[1] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); + + if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) + target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); } static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, @@ -1523,6 +1527,7 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; u64 vtcr_val; + struct arm_smmu_device *smmu = master->smmu; memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( @@ -1531,9 +1536,11 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, target->data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) | - FIELD_PREP(STRTAB_STE_1_SHCFG, - STRTAB_STE_1_SHCFG_INCOMING)); + master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + + if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) + target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | @@ -1560,7 +1567,8 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, * This can safely directly manipulate the STE memory without a sync sequence * because the STE table has not been installed in the SMMU yet. */ -static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, +static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu, + struct arm_smmu_ste *strtab, unsigned int nent) { unsigned int i; @@ -1569,7 +1577,7 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, if (disable_bypass) arm_smmu_make_abort_ste(strtab); else - arm_smmu_make_bypass_ste(strtab); + arm_smmu_make_bypass_ste(smmu, strtab); strtab++; } } @@ -1597,7 +1605,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return -ENOMEM; } - arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); + arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT); arm_smmu_write_strtab_l1_desc(strtab, desc); return 0; } @@ -2637,8 +2645,9 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct device *dev) { struct arm_smmu_ste ste; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); - arm_smmu_make_bypass_ste(&ste); + arm_smmu_make_bypass_ste(master->smmu, &ste); return arm_smmu_attach_dev_ste(dev, &ste); } @@ -3264,7 +3273,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); cfg->strtab_base_cfg = reg; - arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents); + arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents); return 0; } @@ -3777,6 +3786,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return -ENXIO; } + if (reg & IDR1_ATTR_TYPES_OVR) + smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR; + /* Queue sizes, capped to ensure natural alignment */ smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, reg)); @@ -3992,7 +4004,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) * STE table is not programmed to HW, see * arm_smmu_initial_bypass_stes() */ - arm_smmu_make_bypass_ste( + arm_smmu_make_bypass_ste(smmu, arm_smmu_get_step_for_sid(smmu, rmr->sids[i])); } } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 23baf117e7e4..2a19bb63e5c6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -44,6 +44,7 @@ #define IDR1_TABLES_PRESET (1 << 30) #define IDR1_QUEUES_PRESET (1 << 29) #define IDR1_REL (1 << 28) +#define IDR1_ATTR_TYPES_OVR (1 << 27) #define IDR1_CMDQS GENMASK(25, 21) #define IDR1_EVTQS GENMASK(20, 16) #define IDR1_PRIQS GENMASK(15, 11) @@ -647,6 +648,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_SVA (1 << 17) #define ARM_SMMU_FEAT_E2H (1 << 18) #define ARM_SMMU_FEAT_NESTING (1 << 19) +#define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 098869007c69..a95a483def2d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3354,6 +3354,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, { /* Caller must be a probed driver on dev */ struct iommu_group *group = dev->iommu_group; + struct group_device *device; void *curr; int ret; @@ -3363,10 +3364,18 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, if (!group) return -ENODEV; - if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner) + if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner || + pasid == IOMMU_NO_PASID) return -EINVAL; mutex_lock(&group->mutex); + for_each_group_device(group, device) { + if (pasid >= device->dev->iommu->max_pasids) { + ret = -EINVAL; + goto out_unlock; + } + } + curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); if (curr) { ret = xa_err(curr) ? : -EBUSY; diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index a55528469278..4b021a67bdfe 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -316,7 +316,7 @@ static int armada_370_xp_msi_init(struct device_node *node, return 0; } #else -static void armada_370_xp_msi_reenable_percpu(void) {} +static __maybe_unused void armada_370_xp_msi_reenable_percpu(void) {} static inline int armada_370_xp_msi_init(struct device_node *node, phys_addr_t main_int_phys_base) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 37b9f8f1ae1a..7f3dc8ee6ab8 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4221,7 +4221,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { - if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; goto bad; diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c index 00c9b9c05001..01d2743444ec 100644 --- a/drivers/md/dm-vdo/murmurhash3.c +++ b/drivers/md/dm-vdo/murmurhash3.c @@ -8,33 +8,14 @@ #include "murmurhash3.h" +#include <asm/unaligned.h> + static inline u64 rotl64(u64 x, s8 r) { return (x << r) | (x >> (64 - r)); } #define ROTL64(x, y) rotl64(x, y) -static __always_inline u64 getblock64(const u64 *p, int i) -{ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return p[i]; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return __builtin_bswap64(p[i]); -#else -#error "can't figure out byte order" -#endif -} - -static __always_inline void putblock64(u64 *p, int i, u64 value) -{ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - p[i] = value; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - p[i] = __builtin_bswap64(value); -#else -#error "can't figure out byte order" -#endif -} /* Finalization mix - force all bits of a hash block to avalanche */ @@ -60,6 +41,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) const u64 c1 = 0x87c37b91114253d5LLU; const u64 c2 = 0x4cf5ad432745937fLLU; + u64 *hash_out = out; + /* body */ const u64 *blocks = (const u64 *)(data); @@ -67,8 +50,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) int i; for (i = 0; i < nblocks; i++) { - u64 k1 = getblock64(blocks, i * 2 + 0); - u64 k2 = getblock64(blocks, i * 2 + 1); + u64 k1 = get_unaligned_le64(&blocks[i * 2]); + u64 k2 = get_unaligned_le64(&blocks[i * 2 + 1]); k1 *= c1; k1 = ROTL64(k1, 31); @@ -170,6 +153,6 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) h1 += h2; h2 += h1; - putblock64((u64 *)out, 0, h1); - putblock64((u64 *)out, 1, h2); + put_unaligned_le64(h1, &hash_out[0]); + put_unaligned_le64(h2, &hash_out[1]); } diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 64a3492e8002..90c51b12148e 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -413,7 +413,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( struct mmc_blk_ioc_data *idata; int err; - idata = kmalloc(sizeof(*idata), GFP_KERNEL); + idata = kzalloc(sizeof(*idata), GFP_KERNEL); if (!idata) { err = -ENOMEM; goto out; @@ -488,7 +488,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->flags & MMC_BLK_IOC_DROP) return 0; - if (idata->flags & MMC_BLK_IOC_SBC) + if (idata->flags & MMC_BLK_IOC_SBC && i > 0) prev_idata = idatas[i - 1]; /* diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index ab4b964d4058..1d8f5a76096a 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -999,6 +999,17 @@ free_pltfm: return err; } +static void dwcmshc_disable_card_clk(struct sdhci_host *host) +{ + u16 ctrl; + + ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + if (ctrl & SDHCI_CLOCK_CARD_EN) { + ctrl &= ~SDHCI_CLOCK_CARD_EN; + sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL); + } +} + static void dwcmshc_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); @@ -1006,8 +1017,14 @@ static void dwcmshc_remove(struct platform_device *pdev) struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host); struct rk35xx_priv *rk_priv = priv->priv; + pm_runtime_get_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + sdhci_remove_host(host, 0); + dwcmshc_disable_card_clk(host); + clk_disable_unprepare(pltfm_host->clk); clk_disable_unprepare(priv->bus_clk); if (rk_priv) @@ -1099,17 +1116,6 @@ static void dwcmshc_enable_card_clk(struct sdhci_host *host) } } -static void dwcmshc_disable_card_clk(struct sdhci_host *host) -{ - u16 ctrl; - - ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL); - if (ctrl & SDHCI_CLOCK_CARD_EN) { - ctrl &= ~SDHCI_CLOCK_CARD_EN; - sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL); - } -} - static int dwcmshc_runtime_suspend(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index e78faef67d7a..94076b095571 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1439,6 +1439,9 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + if (host->tuning_mode != SDHCI_TUNING_MODE_3) + mmc_retune_needed(host->mmc); + if (omap_host->con != -EINVAL) sdhci_runtime_suspend_host(host); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 97a00ec9a4d4..caacdc0a3819 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->bdev_file) { invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1); - fput(dev->bdev_file); + bdev_fput(dev->bdev_file); } kfree(dev); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 767f66c37f6b..1035820c2377 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2268,8 +2268,6 @@ mt7530_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); - mt7530_pll_setup(priv); - /* Lower Tx driving for TRGMII path */ for (i = 0; i < NUM_TRGMII_CTRL; i++) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), @@ -2285,6 +2283,9 @@ mt7530_setup(struct dsa_switch *ds) val |= MHWTRAP_MANUAL; mt7530_write(priv, MT7530_MHWTRAP, val); + if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ) + mt7530_pll_setup(priv); + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9ed1821184ec..c95787cb9086 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5503,8 +5503,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .family = MV88E6XXX_FAMILY_6250, .name = "Marvell 88E6020", .num_databases = 64, - .num_ports = 4, + /* Ports 2-4 are not routed to pins + * => usable ports 0, 1, 5, 6 + */ + .num_ports = 7, .num_internal_phys = 2, + .invalid_port_mask = BIT(2) | BIT(3) | BIT(4), .max_vid = 4095, .port_base_addr = 0x8, .phy_base_addr = 0x0, diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 833e55e4b961..52ddb4ef259e 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg) return tmp & 0xffff; } -int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd, +int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg, u16 val) { struct sja1105_mdio_private *mdio_priv = bus->priv; diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index dd06b68b33ed..72ea97c5d5d4 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -392,7 +392,9 @@ static void umac_reset(struct bcmasp_intf *intf) umac_wl(intf, 0x0, UMC_CMD); umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD); usleep_range(10, 100); - umac_wl(intf, 0x0, UMC_CMD); + /* We hold the umac in reset and bring it out of + * reset when phy link is up. + */ } static void umac_set_hw_addr(struct bcmasp_intf *intf, @@ -412,6 +414,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask, u32 reg; reg = umac_rl(intf, UMC_CMD); + if (reg & UMC_CMD_SW_RESET) + return; if (enable) reg |= mask; else @@ -430,7 +434,6 @@ static void umac_init(struct bcmasp_intf *intf) umac_wl(intf, 0x800, UMC_FRM_LEN); umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL); umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ); - umac_enable_set(intf, UMC_CMD_PROMISC, 1); } static int bcmasp_tx_poll(struct napi_struct *napi, int budget) @@ -658,6 +661,12 @@ static void bcmasp_adj_link(struct net_device *dev) UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE | UMC_CMD_TX_PAUSE_IGNORE); reg |= cmd_bits; + if (reg & UMC_CMD_SW_RESET) { + reg &= ~UMC_CMD_SW_RESET; + umac_wl(intf, reg, UMC_CMD); + udelay(2); + reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC; + } umac_wl(intf, reg, UMC_CMD); active = phy_init_eee(phydev, 0) >= 0; @@ -1035,19 +1044,12 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) /* Indicate that the MAC is responsible for PHY PM */ phydev->mac_managed_pm = true; - } else if (!intf->wolopts) { - ret = phy_resume(dev->phydev); - if (ret) - goto err_phy_disable; } umac_reset(intf); umac_init(intf); - /* Disable the UniMAC RX/TX */ - umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0); - umac_set_hw_addr(intf, dev->dev_addr); intf->old_duplex = -1; @@ -1062,9 +1064,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll); bcmasp_enable_rx(intf, 1); - /* Turn on UniMAC TX/RX */ - umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1); - intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD); bcmasp_netif_start(dev); @@ -1306,7 +1305,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf) if (intf->wolopts & WAKE_FILTER) bcmasp_netfilt_suspend(intf); - /* UniMAC receive needs to be turned on */ + /* Bring UniMAC out of reset if needed and enable RX */ + reg = umac_rl(intf, UMC_CMD); + if (reg & UMC_CMD_SW_RESET) + reg &= ~UMC_CMD_SW_RESET; + + reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC; + umac_wl(intf, reg, UMC_CMD); + umac_enable_set(intf, UMC_CMD_RX_EN, 1); if (intf->parent->wol_irq > 0) { @@ -1324,7 +1330,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf) { struct device *kdev = &intf->parent->pdev->dev; struct net_device *dev = intf->ndev; - int ret = 0; if (!netif_running(dev)) return 0; @@ -1334,10 +1339,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf) bcmasp_netif_deinit(dev); if (!intf->wolopts) { - ret = phy_suspend(dev->phydev); - if (ret) - goto out; - if (intf->internal_phy) bcmasp_ephy_enable_set(intf, false); else @@ -1354,11 +1355,7 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf) clk_disable_unprepare(intf->parent->clk); - return ret; - -out: - bcmasp_netif_init(dev, false); - return ret; + return 0; } static void bcmasp_resume_from_wol(struct bcmasp_intf *intf) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 7396e2823e32..b1f84b37032a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3280,7 +3280,7 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv, } /* Returns a reusable dma control register value */ -static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) +static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx) { unsigned int i; u32 reg; @@ -3305,6 +3305,14 @@ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) udelay(10); bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH); + if (flush_rx) { + reg = bcmgenet_rbuf_ctrl_get(priv); + bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0)); + udelay(10); + bcmgenet_rbuf_ctrl_set(priv, reg); + udelay(10); + } + return dma_ctrl; } @@ -3368,8 +3376,8 @@ static int bcmgenet_open(struct net_device *dev) bcmgenet_set_hw_addr(priv, dev->dev_addr); - /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + /* Disable RX/TX DMA and flush TX and RX queues */ + dma_ctrl = bcmgenet_dma_disable(priv, true); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); @@ -4235,7 +4243,7 @@ static int bcmgenet_resume(struct device *d) bcmgenet_hfb_create_rxnfc_filter(priv, rule); /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + dma_ctrl = bcmgenet_dma_disable(priv, false); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d7693fdf640d..8bd213da8fb6 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2454,8 +2454,6 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = true; - phy_attached_info(phy_dev); return 0; @@ -2467,10 +2465,12 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); bool suppress_preamble = false; + struct phy_device *phydev; struct device_node *node; int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; + int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2584,6 +2584,13 @@ static int fec_enet_mii_init(struct platform_device *pdev) goto err_out_free_mdiobus; of_node_put(node); + /* find all the PHY devices on the bus and set mac_managed_pm to true */ + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + phydev = mdiobus_get_phy(fep->mii_bus, addr); + if (phydev) + phydev->mac_managed_pm = true; + } + mii_cnt++; /* save fec0 mii_bus */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c index f3c9395d8351..618f66d9586b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c @@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS, true); - desc.data[0] = cpu_to_le32(tqp->index & 0x1ff); + desc.data[0] = cpu_to_le32(tqp->index); ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 999a0ee162a6..941cb529d671 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 +#define HNS3_NIC_LB_TEST_UNEXECUTED 4 + +static int hns3_get_sset_count(struct net_device *netdev, int stringset); static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) { @@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev, static void hns3_self_test(struct net_device *ndev, struct ethtool_test *eth_test, u64 *data) { + int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST); struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; int st_param[HNAE3_LOOP_NONE][2]; bool if_running = netif_running(ndev); + int i; + + /* initialize the loopback test result, avoid marking an unexcuted + * loopback test as PASS. + */ + for (i = 0; i < cnt; i++) + data[i] = HNS3_NIC_LB_TEST_UNEXECUTED; if (hns3_nic_resetting(ndev)) { netdev_err(ndev, "dev resetting!"); - return; + goto failure; } if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) - return; + goto failure; if (netif_msg_ifdown(h)) netdev_info(ndev, "self test start\n"); @@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev, if (netif_msg_ifdown(h)) netdev_info(ndev, "self test end\n"); + return; + +failure: + eth_test->flags |= ETH_TEST_FL_FAILED; } static void hns3_update_limit_promisc_mode(struct net_device *netdev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index b4afb66efe5c..ff6a2ed23ddb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11626,6 +11626,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_pci_uninit; + devl_lock(hdev->devlink); + /* Firmware command queue initialize */ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) @@ -11805,6 +11807,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + devl_unlock(hdev->devlink); return 0; err_mdiobus_unreg: @@ -11817,6 +11820,7 @@ err_msi_uninit: err_cmd_uninit: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); err_devlink_uninit: + devl_unlock(hdev->devlink); hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.hw.io_base); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 1fef6bb5a5fb..4b6e7536170a 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -628,6 +628,7 @@ struct e1000_phy_info { u32 id; u32 reset_delay_us; /* in usec */ u32 revision; + u32 retry_count; enum e1000_media_type media_type; @@ -644,6 +645,7 @@ struct e1000_phy_info { bool polarity_correction; bool speed_downgraded; bool autoneg_wait_to_complete; + bool retry_enabled; }; struct e1000_nvm_info { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 19e450a5bd31..f9e94be36e97 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -222,11 +222,18 @@ out: if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* Unforce SMBus mode in PHY */ e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg); phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg); + e1000e_enable_phy_retry(hw); + /* Unforce SMBus mode in MAC */ mac_reg = er32(CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; @@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) goto out; } + /* There is no guarantee that the PHY is accessible at this time + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is * inaccessible and resetting the PHY is not blocked, toggle the * LANPHYPC Value bit to force the interconnect to PCIe mode. @@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) break; } + e1000e_enable_phy_retry(hw); + hw->phy.ops.release(hw); if (!ret_val) { @@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) phy->id = e1000_phy_unknown; + if (hw->mac.type == e1000_pch_mtp) { + phy->retry_count = 2; + e1000e_enable_phy_retry(hw); + } + ret_val = e1000_init_phy_workarounds_pchlan(hw); if (ret_val) return ret_val; @@ -1146,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; - /* Force SMBus mode in PHY */ - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); - if (ret_val) - goto release; - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); - - /* Force SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); - /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ @@ -1313,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) /* Toggle LANPHYPC Value bit */ e1000_toggle_lanphypc_pch_lpt(hw); + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + /* Unforce SMBus mode in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) { @@ -1333,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); + e1000e_enable_phy_retry(hw); + /* Unforce SMBus mode in MAC */ mac_reg = er32(CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index cc8c531ec3df..3692fce20195 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6623,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) struct e1000_hw *hw = &adapter->hw; u32 ctrl, ctrl_ext, rctl, status, wufc; int retval = 0; + u16 smb_ctrl; /* Runtime suspend should only enable wakeup for link changes */ if (runtime) @@ -6696,6 +6697,23 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) if (retval) return retval; } + + /* Force SMBUS to allow WOL */ + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + + e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl); + smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl); + + e1000e_enable_phy_retry(hw); + + /* Force SMBus mode in MAC */ + ctrl_ext = er32(CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, ctrl_ext); } /* Ensure that the appropriate bits are set in LPI_CTRL diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 5e329156d1ba..93544f1cc2a5 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0); } +void e1000e_disable_phy_retry(struct e1000_hw *hw) +{ + hw->phy.retry_enabled = false; +} + +void e1000e_enable_phy_retry(struct e1000_hw *hw) +{ + hw->phy.retry_enabled = true; +} + /** * e1000e_read_phy_reg_mdic - Read MDI control register * @hw: pointer to the HW structure @@ -118,55 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) **/ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) { + u32 i, mdic = 0, retry_counter, retry_max; struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; + bool success; if (offset > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } + retry_max = phy->retry_enabled ? phy->retry_count : 0; + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ - mdic = ((offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_READ)); + for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { + success = true; - ew32(MDIC, mdic); + mdic = ((offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_READ)); - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - udelay(50); - mdic = er32(MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Read PHY Reg Address %d Error\n", offset); - return -E1000_ERR_PHY; - } - if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { - e_dbg("MDI Read offset error - requested %d, returned %d\n", - offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); - return -E1000_ERR_PHY; + ew32(MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usleep_range(50, 60); + mdic = er32(MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + e_dbg("MDI Read PHY Reg Address %d did not complete\n", + offset); + success = false; + } + if (mdic & E1000_MDIC_ERROR) { + e_dbg("MDI Read PHY Reg Address %d Error\n", offset); + success = false; + } + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { + e_dbg("MDI Read offset error - requested %d, returned %d\n", + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); + success = false; + } + + /* Allow some time after each MDIC transaction to avoid + * reading duplicate data in the next MDIC transaction. + */ + if (hw->mac.type == e1000_pch2lan) + usleep_range(100, 150); + + if (success) { + *data = (u16)mdic; + return 0; + } + + if (retry_counter != retry_max) { + e_dbg("Perform retry on PHY transaction...\n"); + mdelay(10); + } } - *data = (u16)mdic; - /* Allow some time after each MDIC transaction to avoid - * reading duplicate data in the next MDIC transaction. - */ - if (hw->mac.type == e1000_pch2lan) - udelay(100); - return 0; + return -E1000_ERR_PHY; } /** @@ -179,56 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) **/ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) { + u32 i, mdic = 0, retry_counter, retry_max; struct e1000_phy_info *phy = &hw->phy; - u32 i, mdic = 0; + bool success; if (offset > MAX_PHY_REG_ADDRESS) { e_dbg("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } + retry_max = phy->retry_enabled ? phy->retry_count : 0; + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ - mdic = (((u32)data) | - (offset << E1000_MDIC_REG_SHIFT) | - (phy->addr << E1000_MDIC_PHY_SHIFT) | - (E1000_MDIC_OP_WRITE)); + for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { + success = true; - ew32(MDIC, mdic); + mdic = (((u32)data) | + (offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_WRITE)); - /* Poll the ready bit to see if the MDI read completed - * Increasing the time out as testing showed failures with - * the lower time out - */ - for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - udelay(50); - mdic = er32(MDIC); - if (mdic & E1000_MDIC_READY) - break; - } - if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset); - return -E1000_ERR_PHY; - } - if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Write PHY Red Address %d Error\n", offset); - return -E1000_ERR_PHY; - } - if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { - e_dbg("MDI Write offset error - requested %d, returned %d\n", - offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); - return -E1000_ERR_PHY; - } + ew32(MDIC, mdic); - /* Allow some time after each MDIC transaction to avoid - * reading duplicate data in the next MDIC transaction. - */ - if (hw->mac.type == e1000_pch2lan) - udelay(100); + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usleep_range(50, 60); + mdic = er32(MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + e_dbg("MDI Write PHY Reg Address %d did not complete\n", + offset); + success = false; + } + if (mdic & E1000_MDIC_ERROR) { + e_dbg("MDI Write PHY Reg Address %d Error\n", offset); + success = false; + } + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { + e_dbg("MDI Write offset error - requested %d, returned %d\n", + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); + success = false; + } - return 0; + /* Allow some time after each MDIC transaction to avoid + * reading duplicate data in the next MDIC transaction. + */ + if (hw->mac.type == e1000_pch2lan) + usleep_range(100, 150); + + if (success) + return 0; + + if (retry_counter != retry_max) { + e_dbg("Perform retry on PHY transaction...\n"); + mdelay(10); + } + } + + return -E1000_ERR_PHY; } /** diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h index c48777d09523..049bb325b4b1 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.h +++ b/drivers/net/ethernet/intel/e1000e/phy.h @@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data); void e1000_power_up_phy_copper(struct e1000_hw *hw); void e1000_power_down_phy_copper(struct e1000_hw *hw); +void e1000e_disable_phy_retry(struct e1000_hw *hw); +void e1000e_enable_phy_retry(struct e1000_hw *hw); s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index ba24f3fa92c3..2fbabcdb5bb5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -955,6 +955,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + bool in_busy_poll; int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f86578857e8a..48b9ddb2b1b3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1253,8 +1253,11 @@ int i40e_count_filters(struct i40e_vsi *vsi) int bkt; int cnt = 0; - hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) - ++cnt; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_ACTIVE) + ++cnt; + } return cnt; } @@ -3911,6 +3914,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; + /* Set ITR for software interrupts triggered after exiting + * busy-loop polling. + */ + wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), + I40E_ITR_20K); + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 14ab642cafdb..432afbb64201 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -333,8 +333,11 @@ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3 #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5 +#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT) #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT) +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25 +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT) #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0 #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0d7177083708..1a12b732818e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2630,7 +2630,22 @@ process_next: return failure ? budget : (int)total_rx_packets; } -static inline u32 i40e_buildreg_itr(const int type, u16 itr) +/** + * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register + * @itr_idx: interrupt throttling index + * @interval: interrupt throttling interval value in usecs + * @force_swint: force software interrupt + * + * The function builds a value for I40E_PFINT_DYN_CTLN register that + * is used to update interrupt throttling interval for specified ITR index + * and optionally enforces a software interrupt. If the @itr_idx is equal + * to I40E_ITR_NONE then no interval change is applied and only @force_swint + * parameter is taken into account. If the interval change and enforced + * software interrupt are not requested then the built value just enables + * appropriate vector interrupt. + **/ +static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval, + bool force_swint) { u32 val; @@ -2644,23 +2659,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) * an event in the PBA anyway so we need to rely on the automask * to hold pending events for us until the interrupt is re-enabled * - * The itr value is reported in microseconds, and the register - * value is recorded in 2 microsecond units. For this reason we - * only need to shift by the interval shift - 1 instead of the - * full value. + * We have to shift the given value as it is reported in microseconds + * and the register value is recorded in 2 microsecond units. */ - itr &= I40E_ITR_MASK; + interval >>= 1; + /* 1. Enable vector interrupt + * 2. Update the interval for the specified ITR index + * (I40E_ITR_NONE in the register is used to indicate that + * no interval update is requested) + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); + FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) | + FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval); + + /* 3. Enforce software interrupt trigger if requested + * (These software interrupts rate is limited by ITR2 that is + * set to 20K interrupts per second) + */ + if (force_swint) + val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | + FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK, + I40E_SW_ITR); return val; } -/* a small macro to shorten up some long lines */ -#define INTREG I40E_PFINT_DYN_CTLN - /* The act of updating the ITR will cause it to immediately trigger. In order * to prevent this from throwing off adaptive update statistics we defer the * update so that it can only happen so often. So after either Tx or Rx are @@ -2679,8 +2704,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { + enum i40e_dyn_idx itr_idx = I40E_ITR_NONE; struct i40e_hw *hw = &vsi->back->hw; - u32 intval; + u16 interval = 0; + u32 itr_val; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) { @@ -2702,8 +2729,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ if (q_vector->rx.target_itr < q_vector->rx.current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || @@ -2712,25 +2739,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - intval = i40e_buildreg_itr(I40E_TX_ITR, - q_vector->tx.target_itr); + itr_idx = I40E_TX_ITR; + interval = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { /* Rx ITR needs to be increased, third priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* No ITR update, lowest priority */ - intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown) q_vector->itr_countdown--; } - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), intval); + /* Do not update interrupt control register if VSI is down */ + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + /* Update ITR interval if necessary and enforce software interrupt + * if we are exiting busy poll. + */ + if (q_vector->in_busy_poll) { + itr_val = i40e_buildreg_itr(itr_idx, interval, true); + q_vector->in_busy_poll = false; + } else { + itr_val = i40e_buildreg_itr(itr_idx, interval, false); + } + wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val); } /** @@ -2845,6 +2883,8 @@ tx_only: */ if (likely(napi_complete_done(napi, work_done))) i40e_update_enable_itr(vsi, q_vector); + else + q_vector->in_busy_poll = true; return min(work_done, budget - 1); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index abf15067eb5d..2cdc7de6301c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -68,6 +68,7 @@ enum i40e_dyn_idx { /* these are indexes into ITRN registers */ #define I40E_RX_ITR I40E_IDX_ITR0 #define I40E_TX_ITR I40E_IDX_ITR1 +#define I40E_SW_ITR I40E_IDX_ITR2 /* Supported RSS offloads */ #define I40E_DEFAULT_RSS_HENA ( \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 83a34e98bdc7..232b65b9c8ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1624,8 +1624,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; - int i, v; u32 reg; + int i; /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) @@ -1636,11 +1636,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) { - vf = &pf->vf[v]; + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is being reset no need to trigger reset again */ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - i40e_trigger_vf_reset(&pf->vf[v], flr); + i40e_trigger_vf_reset(vf, flr); } /* HW requires some time to make sure it can flush the FIFO for a VF @@ -1649,14 +1648,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { + for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) { usleep_range(10000, 20000); /* Check each VF in sequence, beginning with the VF to fail * the previous check. */ - while (v < pf->num_alloc_vfs) { - vf = &pf->vf[v]; + while (vf < &pf->vf[pf->num_alloc_vfs]) { if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) @@ -1666,7 +1664,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If the current VF has finished resetting, move on * to the next VF in sequence. */ - v++; + ++vf; } } @@ -1676,39 +1674,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* Display a warning if at least one VF didn't manage to reset in * time, but continue on with the operation. */ - if (v < pf->num_alloc_vfs) + if (vf < &pf->vf[pf->num_alloc_vfs]) dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - pf->vf[v].vf_id); + vf->vf_id); usleep_range(10000, 20000); /* Begin disabling all the rings associated with VFs, but do not wait * between each VF. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]); } /* Now that we've notified HW to disable all of the VF rings, wait * until they finish. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]); } /* Hw may need up to 50ms to finish disabling the RX queues. We @@ -1717,12 +1715,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) mdelay(50); /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_cleanup_reset_vf(&pf->vf[v]); + i40e_cleanup_reset_vf(vf); } i40e_flush(hw); @@ -3139,11 +3137,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) /* Allow to delete VF primary MAC only if it was not set * administratively by PF or if VF is trusted. */ - if (ether_addr_equal(addr, vf->default_lan_addr.addr) && - i40e_can_vf_change_mac(vf)) - was_unimac_deleted = true; - else - continue; + if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { + if (i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + } if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 8040317c9561..1f3e7a6903e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -593,8 +593,9 @@ struct ice_aqc_recipe_data_elem { struct ice_aqc_recipe_to_profile { __le16 profile_id; u8 rsvd[6]; - DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES); + __le64 recipe_assoc; }; +static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16); /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index db4b2844e1f7..d9f6cc71d900 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1002,8 +1002,8 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw) */ int ice_init_hw(struct ice_hw *hw) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); - void *mac_buf __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; + void *mac_buf __free(kfree) = NULL; u16 mac_buf_len; int status; @@ -3272,7 +3272,7 @@ int ice_update_link_info(struct ice_port_info *pi) return status; if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) @@ -3420,7 +3420,7 @@ ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, int ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; struct ice_aqc_set_phy_cfg_data cfg = { 0 }; struct ice_hw *hw; int status; @@ -3561,7 +3561,7 @@ int ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec) { - struct ice_aqc_get_phy_caps_data *pcaps __free(kfree); + struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL; struct ice_hw *hw; int status; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 255a9c8151b4..78b833b3e1d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -941,11 +941,11 @@ static u64 ice_loopback_test(struct net_device *netdev) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *orig_vsi = np->vsi, *test_vsi; struct ice_pf *pf = orig_vsi->back; + u8 *tx_frame __free(kfree) = NULL; u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; - u8 *tx_frame __free(kfree); int i; netdev_info(netdev, "loopback test\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index f97128b69f87..f0e76f0a6d60 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2041,7 +2041,7 @@ int ice_init_lag(struct ice_pf *pf) /* associate recipes to profiles */ for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) { err = ice_aq_get_recipe_to_profile(&pf->hw, n, - (u8 *)&recipe_bits, NULL); + &recipe_bits, NULL); if (err) continue; @@ -2049,7 +2049,7 @@ int ice_init_lag(struct ice_pf *pf) recipe_bits |= BIT(lag->pf_recipe) | BIT(lag->lport_recipe); ice_aq_map_recipe_to_profile(&pf->hw, n, - (u8 *)&recipe_bits, NULL); + recipe_bits, NULL); } } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index ee3f0d3e3f6d..558422120312 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3091,7 +3091,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) { struct ice_vsi_cfg_params params = {}; struct ice_coalesce_stored *coalesce; - int prev_num_q_vectors = 0; + int prev_num_q_vectors; struct ice_pf *pf; int ret; @@ -3105,13 +3105,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; - coalesce = kcalloc(vsi->num_q_vectors, - sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (!coalesce) - return -ENOMEM; - - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); - ret = ice_vsi_realloc_stat_arrays(vsi); if (ret) goto err_vsi_cfg; @@ -3121,6 +3114,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (ret) goto err_vsi_cfg; + coalesce = kcalloc(vsi->num_q_vectors, + sizeof(struct ice_coalesce_stored), GFP_KERNEL); + if (!coalesce) + return -ENOMEM; + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + ret = ice_vsi_cfg_tc_lan(pf, vsi); if (ret) { if (vsi_flags & ICE_VSI_FLAG_INIT) { @@ -3139,8 +3139,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) err_vsi_cfg_tc_lan: ice_vsi_decfg(vsi); -err_vsi_cfg: kfree(coalesce); +err_vsi_cfg: return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index d4baae8c3b72..b4ea935e8300 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2025,12 +2025,12 @@ error_out: * ice_aq_map_recipe_to_profile - Map recipe to packet profile * @hw: pointer to the HW struct * @profile_id: package profile ID to associate the recipe with - * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @r_assoc: Recipe bitmap filled in and need to be returned as response * @cd: pointer to command details structure or NULL * Recipe to profile association (0x0291) */ int -ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, struct ice_sq_cd *cd) { struct ice_aqc_recipe_to_profile *cmd; @@ -2042,7 +2042,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, /* Set the recipe ID bit in the bitmask to let the device know which * profile we are associating the recipe to */ - memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc)); + cmd->recipe_assoc = cpu_to_le64(r_assoc); return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } @@ -2051,12 +2051,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, * ice_aq_get_recipe_to_profile - Map recipe to packet profile * @hw: pointer to the HW struct * @profile_id: package profile ID to associate the recipe with - * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @r_assoc: Recipe bitmap filled in and need to be returned as response * @cd: pointer to command details structure or NULL * Associate profile ID with given recipe (0x0293) */ int -ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, struct ice_sq_cd *cd) { struct ice_aqc_recipe_to_profile *cmd; @@ -2069,7 +2069,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); if (!status) - memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc)); + *r_assoc = le64_to_cpu(cmd->recipe_assoc); return status; } @@ -2108,6 +2108,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) static void ice_get_recp_to_prof_map(struct ice_hw *hw) { DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u64 recp_assoc; u16 i; for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) { @@ -2115,8 +2116,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw) bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES); bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES); - if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL)) + if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL)) continue; + bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); bitmap_copy(profile_to_recipe[i], r_bitmap, ICE_MAX_NUM_RECIPES); for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES) @@ -5390,22 +5392,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, */ list_for_each_entry(fvit, &rm->fv_list, list_entry) { DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u64 recp_assoc; u16 j; status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id, - (u8 *)r_bitmap, NULL); + &recp_assoc, NULL); if (status) goto err_unroll; + bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_acquire_change_lock(hw, ICE_RES_WRITE); if (status) goto err_unroll; + bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id, - (u8 *)r_bitmap, - NULL); + recp_assoc, NULL); ice_release_change_lock(hw); if (status) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index db7e501b7e0a..89ffa1b51b5a 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw, struct ice_aqc_recipe_data_elem *s_recipe_list, u16 num_recipes, struct ice_sq_cd *cd); int -ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, struct ice_sq_cd *cd); int -ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, struct ice_sq_cd *cd); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index 80dc4bcdd3a4..b3e1bdcb80f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) struct ice_vsi_vlan_ops *vlan_ops; struct ice_pf *pf = vsi->back; - if (ice_is_dvm_ena(&pf->hw)) { - vlan_ops = &vsi->outer_vlan_ops; - - /* setup outer VLAN ops */ - vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; + /* setup inner VLAN ops */ + vlan_ops = &vsi->inner_vlan_ops; - /* setup inner VLAN ops */ - vlan_ops = &vsi->inner_vlan_ops; + if (ice_is_dvm_ena(&pf->hw)) { vlan_ops->add_vlan = noop_vlan_arg; vlan_ops->del_vlan = noop_vlan_arg; vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping; vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping; vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; - } else { - vlan_ops = &vsi->inner_vlan_ops; + /* setup outer VLAN ops */ + vlan_ops = &vsi->outer_vlan_ops; + vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; + vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; + } else { vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 6dd7a66bb897..f5bc4a278074 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2941,6 +2941,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0, VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M); + skb->protocol = eth_type_trans(skb, rxq->vport->netdev); + decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; /* If we don't know the ptype we can't do anything else with it. Just * pass it up the stack as-is. @@ -2951,8 +2953,6 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, /* process RSS/hash */ idpf_rx_hash(rxq, skb, rx_desc, &decoded); - skb->protocol = eth_type_trans(skb, rxq->vport->netdev); - if (le16_get_bits(rx_desc->hdrlen_flags, VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) return idpf_rx_rsc(rxq, skb, rx_desc, &decoded); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2e1cfbd82f4f..35ad40a803cb 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1642,10 +1642,6 @@ done: if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - /* FIXME: add support for retrieving timestamps from - * the other timer registers before skipping the - * timestamping request. - */ unsigned long flags; u32 tstamp_flags; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 13a6fca31004..866024f2b9ee 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) goto err_out; } - xs = kzalloc(sizeof(*xs), GFP_KERNEL); + algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); + if (unlikely(!algo)) { + err = -ENOENT; + goto err_out; + } + + xs = kzalloc(sizeof(*xs), GFP_ATOMIC); if (unlikely(!xs)) { err = -ENOMEM; goto err_out; @@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4)); xs->xso.dev = adapter->netdev; - algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); - if (unlikely(!algo)) { - err = -ENOENT; - goto err_xs; - } - aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8; - xs->aead = kzalloc(aead_len, GFP_KERNEL); + xs->aead = kzalloc(aead_len, GFP_ATOMIC); if (unlikely(!xs->aead)) { err = -ENOMEM; goto err_xs; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 3c0f55b3e48e..b86f3224f0b7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 72e060cf6b61..e9bf9231b018 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) continue; lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { + if (iter >= MAX_LMAC_COUNT) + continue; lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), iter); rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index e350242bbafb..be709f83f331 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1657,7 +1657,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz, struct npc_coalesced_kpu_prfl *img_data = NULL; int i = 0, rc = -EINVAL; void __iomem *kpu_prfl_addr; - u16 offset; + u32 offset; img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr; if (le64_to_cpu(img_data->signature) == KPU_SIGN && diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index b40bd0e46751..3f46d5e0fb2e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev) * mcam entries are enabled to receive the packets. Hence disable the * packet I/O. */ - if (err == EIO) + if (err == -EIO) goto err_disable_rxtx; else if (err) goto err_tx_stop_queues; diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 3d09fa54598f..ba303868686a 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/rtnetlink.h> #include <linux/skbuff.h> #include "mlxbf_gige.h" @@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev) control |= MLXBF_GIGE_CONTROL_PORT_EN; writeq(control, priv->base + MLXBF_GIGE_CONTROL); - err = mlxbf_gige_request_irqs(priv); - if (err) - return err; mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); if (err) - goto free_irqs; + return err; /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_tx_init(priv); if (err) - goto free_irqs; + goto phy_deinit; err = mlxbf_gige_rx_init(priv); if (err) goto tx_deinit; @@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev) napi_enable(&priv->napi); netif_start_queue(netdev); + err = mlxbf_gige_request_irqs(priv); + if (err) + goto napi_deinit; + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev) return 0; +napi_deinit: + netif_stop_queue(netdev); + napi_disable(&priv->napi); + netif_napi_del(&priv->napi); + mlxbf_gige_rx_deinit(priv); + tx_deinit: mlxbf_gige_tx_deinit(priv); -free_irqs: - mlxbf_gige_free_irqs(priv); +phy_deinit: + phy_stop(phydev); return err; } @@ -485,8 +493,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev) { struct mlxbf_gige *priv = platform_get_drvdata(pdev); - writeq(0, priv->base + MLXBF_GIGE_INT_EN); - mlxbf_gige_clean_port(priv); + rtnl_lock(); + netif_device_detach(priv->netdev); + + if (netif_running(priv->netdev)) + dev_close(priv->netdev); + + rtnl_unlock(); } static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index bd8aa83b47e5..75a988c0bd79 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -25,6 +25,8 @@ #define PCS_POWER_STATE_DOWN 0x6 #define PCS_POWER_STATE_UP 0x4 +#define RFE_RD_FIFO_TH_3_DWORDS 0x3 + static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { u32 chip_rev; @@ -3272,6 +3274,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter) lan743x_pci_cleanup(adapter); } +static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter) +{ + u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_; + + if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) { + u32 misc_ctl; + + misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0); + misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_; + misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_, + RFE_RD_FIFO_TH_3_DWORDS); + lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl); + } +} + static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { @@ -3287,6 +3304,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, pci11x1x_strap_get_status(adapter); spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); + pci11x1x_set_rfe_rd_fifo_threshold(adapter); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index be79cb0ae5af..645bc048e52e 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -26,6 +26,7 @@ #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_REV_A0_ (0x00000000) #define ID_REV_CHIP_REV_B0_ (0x00000010) +#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0) #define FPGA_REV (0x04) #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF) @@ -311,6 +312,9 @@ #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) #define SGMII_CTL_SGMII_POWER_DN_ BIT(1) +#define MISC_CTL_0 (0x920) +#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4) + /* Vendor Specific SGMII MMD details */ #define SR_VSMMD_PCS_ID1 0x0004 #define SR_VSMMD_PCS_ID2 0x0005 diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 59287c6e6cee..d8af5e7e15b4 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; - *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); + *datasize = mtu + ETH_HLEN; } static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5c879a5c86d7..6f1e6f386b7b 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1314,17 +1314,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01); } +static void rtl_dash_loop_wait(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long usecs, int n, bool high) +{ + if (!tp->dash_enabled) + return; + rtl_loop_wait(tp, c, usecs, n, high); +} + +static void rtl_dash_loop_wait_high(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, true); +} + +static void rtl_dash_loop_wait_low(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, false); +} + static void rtl8168dp_driver_start(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START); - rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_start(struct rtl8169_private *tp) { r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); + rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); } static void rtl8168_driver_start(struct rtl8169_private *tp) @@ -1338,7 +1361,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp) static void rtl8168dp_driver_stop(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP); - rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_stop(struct rtl8169_private *tp) @@ -1346,7 +1369,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp) rtl8168ep_stop_cmac(tp); r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); } static void rtl8168_driver_stop(struct rtl8169_private *tp) @@ -5141,6 +5164,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp) struct mii_bus *new_bus; int ret; + /* On some boards with this chip version the BIOS is buggy and misses + * to reset the PHY page selector. This results in the PHY ID read + * accessing registers on a different page, returning a more or + * less random value. Fix this by resetting the page selector first. + */ + if (tp->mac_version == RTL_GIGA_MAC_VER_25 || + tp->mac_version == RTL_GIGA_MAC_VER_26) + r8169_mdio_write(tp, 0x1f, 0); + new_bus = devm_mdiobus_alloc(&pdev->dev); if (!new_bus) return -ENOMEM; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d1be030c8848..ba01c8cc3c90 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1324,12 +1324,12 @@ static int ravb_poll(struct napi_struct *napi, int budget) int q = napi - priv->napi; int mask = BIT(q); int quota = budget; + bool unmask; /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (ravb_rx(ndev, "a, q)) - goto out; + unmask = !ravb_rx(ndev, "a, q); /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); @@ -1339,6 +1339,18 @@ static int ravb_poll(struct napi_struct *napi, int budget) netif_wake_subqueue(ndev, q); spin_unlock_irqrestore(&priv->lock, flags); + /* Receive error message handling */ + priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; + if (info->nc_queues) + priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; + if (priv->rx_over_errors != ndev->stats.rx_over_errors) + ndev->stats.rx_over_errors = priv->rx_over_errors; + if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) + ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; + + if (!unmask) + goto out; + napi_complete(napi); /* Re-enable RX/TX interrupts */ @@ -1352,14 +1364,6 @@ static int ravb_poll(struct napi_struct *napi, int budget) } spin_unlock_irqrestore(&priv->lock, flags); - /* Receive error message handling */ - priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; - if (info->nc_queues) - priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; - if (priv->rx_over_errors != ndev->stats.rx_over_errors) - ndev->stats.rx_over_errors = priv->rx_over_errors; - if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) - ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; out: return budget - quota; } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 475e1e8c1d35..0786eb0da391 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -50,7 +50,7 @@ * the macros available to do this only define GCC 8. */ __diag_push(); -__diag_ignore(GCC, 8, "-Woverride-init", +__diag_ignore_all("-Woverride-init", "logic to initialize all and then override some is OK"); static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { SH_ETH_OFFSET_DEFAULTS, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 6b6d0de09619..cef25efbdff9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -92,19 +92,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 base_register; - u32 value; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3; - if (queue >= 4) - queue -= 4; + ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3); - value = readl(ioaddr + base_register); + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) & + GMAC_RXQCTRL_PSRQX_MASK(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + GMAC_RXQCTRL_PSRQX_MASK(queue); - value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue); - value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + } else { + queue -= 4; + + ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & GMAC_RXQCTRL_PSRQX_MASK(queue); - writel(value, ioaddr + base_register); + + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + } } static void dwmac4_tx_queue_priority(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 1af2f89a0504..e841e312077e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -105,17 +105,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 value, reg; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3; - if (queue >= 4) + ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3); + + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) & + XGMAC_PSRQ(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); + + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + } else { queue -= 4; - value = readl(ioaddr + reg); - value &= ~XGMAC_PSRQ(queue); - value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue); + ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); - writel(value, ioaddr + reg); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + } } static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 5b5d5e4310d1..2fa511227eac 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -20,6 +20,8 @@ #include "txgbe_phy.h" #include "txgbe_hw.h" +#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw" + static int txgbe_swnodes_register(struct txgbe *txgbe) { struct txgbe_nodes *nodes = &txgbe->nodes; @@ -571,8 +573,8 @@ static int txgbe_clock_register(struct txgbe *txgbe) char clk_name[32]; struct clk *clk; - snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d", - pci_dev_id(pdev)); + snprintf(clk_name, sizeof(clk_name), "%s.%d", + TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev)); clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); if (IS_ERR(clk)) @@ -634,7 +636,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe) info.parent = &pdev->dev; info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]); - info.name = "i2c_designware"; + info.name = TXGBE_I2C_CLK_DEV_NAME; info.id = pci_dev_id(pdev); info.res = &DEFINE_RES_IRQ(pdev->irq); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 9df39cf8b097..1072e2210aed 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1443,7 +1443,7 @@ static int temac_probe(struct platform_device *pdev) } /* map device registers */ - lp->regs = devm_platform_ioremap_resource_byname(pdev, 0); + lp->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map TEMAC registers\n"); return -ENOMEM; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 8b8634600c51..ddb50a0e2bc8 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2431,6 +2431,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct lan8814_ptp_rx_ts *rx_ts, *tmp; int txcfg = 0, rxcfg = 0; int pkt_ts_enable; + int tx_mod; ptp_priv->hwts_tx_type = config->tx_type; ptp_priv->rx_filter = config->rx_filter; @@ -2477,9 +2478,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable); lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable); - if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD); + if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) { lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, - PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) { + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, + tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } if (config->rx_filter != HWTSTAMP_FILTER_NONE) lan8814_config_ts_intr(ptp_priv->phydev, true); @@ -2537,7 +2543,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts, } } -static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2547,7 +2553,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) ptp_header = ptp_parse_header(skb, type); skb_pull_inline(skb, ETH_HLEN); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, @@ -2559,7 +2569,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, bool ret = false; u16 skb_sig; - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + return ret; /* Iterate over all RX timestamps and match it with the received skbs */ spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); @@ -2834,7 +2845,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) return 0; } -static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2842,7 +2853,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) type = ptp_classify_raw(skb); ptp_header = ptp_parse_header(skb, type); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, @@ -2856,7 +2871,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) { - lan8814_get_sig_tx(skb, &skb_sig); + if (!lan8814_get_sig_tx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &seq_id, sizeof(seq_id))) continue; @@ -2910,7 +2926,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) { - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) continue; diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 4717c59d51d0..e79657f76bea 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -797,7 +797,7 @@ static int at8031_parse_dt(struct phy_device *phydev) static int at8031_probe(struct phy_device *phydev) { - struct at803x_priv *priv = phydev->priv; + struct at803x_priv *priv; int mode_cfg; int ccr; int ret; @@ -806,6 +806,8 @@ static int at8031_probe(struct phy_device *phydev) if (ret) return ret; + priv = phydev->priv; + /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping * options. */ diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 88e084534853..a9c418890a1c 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev) if (is_valid_ether_addr(mac)) { eth_hw_addr_set(dev->net, mac); + if (!is_local_ether_addr(mac)) + dev->net->addr_assign_type = NET_ADDR_PERM; } else { netdev_info(dev->net, "invalid MAC address, using random\n"); eth_hw_addr_random(dev->net); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index db6d7013df66..c3bdf433d8f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -3081,8 +3081,6 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) struct iwl_fw_dbg_params params = {0}; struct iwl_fwrt_dump_data *dump_data = &fwrt->dump.wks[wk_idx].dump_data; - u32 policy; - u32 time_point; if (!test_bit(wk_idx, &fwrt->dump.active_wks)) return; @@ -3113,13 +3111,16 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, false); - policy = le32_to_cpu(dump_data->trig->apply_policy); - time_point = le32_to_cpu(dump_data->trig->time_point); + if (iwl_trans_dbg_ini_valid(fwrt->trans)) { + u32 policy = le32_to_cpu(dump_data->trig->apply_policy); + u32 time_point = le32_to_cpu(dump_data->trig->time_point); - if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { - IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); - iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) { + IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n"); + iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0); + } } + if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) iwl_force_nmi(fwrt->trans); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 553c6fffc7c6..52518a47554e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1260,15 +1260,15 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, if (IS_ERR_OR_NULL(vif)) return 1; - if (ieee80211_vif_is_mld(vif) && vif->cfg.assoc) { + if (hweight16(vif->active_links) > 1) { /* - * Select the 'best' link. May need to revisit, it seems - * better to not optimize for throughput but rather range, - * reliability and power here - and select 2.4 GHz ... + * Select the 'best' link. + * May need to revisit, it seems better to not optimize + * for throughput but rather range, reliability and + * power here - and select 2.4 GHz ... */ - primary_link = - iwl_mvm_mld_get_primary_link(mvm, vif, - vif->active_links); + primary_link = iwl_mvm_mld_get_primary_link(mvm, vif, + vif->active_links); if (WARN_ONCE(primary_link < 0, "no primary link in 0x%x\n", vif->active_links)) @@ -1277,6 +1277,8 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, ret = ieee80211_set_active_links(vif, BIT(primary_link)); if (ret) return ret; + } else if (vif->active_links) { + primary_link = __ffs(vif->active_links); } else { primary_link = 0; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 51b01f7528be..7fe57ecd0682 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -748,7 +748,9 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { struct dentry *dbgfs_dir = vif->debugfs_dir; struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - char buf[100]; + char buf[3 * 3 + 11 + (NL80211_WIPHY_NAME_MAXLEN + 1) + + (7 + IFNAMSIZ + 1) + 6 + 1]; + char name[7 + IFNAMSIZ + 1]; /* this will happen in monitor mode */ if (!dbgfs_dir) @@ -761,10 +763,11 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * find * netdev:wlan0 -> ../../../ieee80211/phy0/netdev:wlan0/iwlmvm/ */ - snprintf(buf, 100, "../../../%pd3/iwlmvm", dbgfs_dir); + snprintf(name, sizeof(name), "%pd", dbgfs_dir); + snprintf(buf, sizeof(buf), "../../../%pd3/iwlmvm", dbgfs_dir); - mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name, - mvm->debugfs_dir, buf); + mvmvif->dbgfs_slink = + debugfs_create_symlink(name, mvm->debugfs_dir, buf); } void iwl_mvm_vif_dbgfs_rm_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c index f13f13e6b71a..9f69e04594e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c @@ -46,6 +46,27 @@ static int iwl_mvm_link_cmd_send(struct iwl_mvm *mvm, return ret; } +int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm_vif_link_info *link_info = + mvmvif->link[link_conf->link_id]; + + if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) { + link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm, + mvmvif); + if (link_info->fw_link_id >= + ARRAY_SIZE(mvm->link_id_to_link_conf)) + return -EINVAL; + + rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id], + link_conf); + } + + return 0; +} + int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { @@ -55,19 +76,14 @@ int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_link_config_cmd cmd = {}; unsigned int cmd_id = WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD); u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1); + int ret; if (WARN_ON_ONCE(!link_info)) return -EINVAL; - if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) { - link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm, - mvmvif); - if (link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf)) - return -EINVAL; - - rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id], - link_conf); - } + ret = iwl_mvm_set_link_mapping(mvm, vif, link_conf); + if (ret) + return ret; /* Update SF - Disable if needed. if this fails, SF might still be on * while many macs are bound, which is forbidden - so fail the binding. @@ -248,6 +264,24 @@ send_cmd: return ret; } +int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm_vif_link_info *link_info = + mvmvif->link[link_conf->link_id]; + + /* mac80211 thought we have the link, but it was never configured */ + if (WARN_ON(!link_info || + link_info->fw_link_id >= + ARRAY_SIZE(mvm->link_id_to_link_conf))) + return -EINVAL; + + RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id], + NULL); + return 0; +} + int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { @@ -257,13 +291,10 @@ int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_link_config_cmd cmd = {}; int ret; - /* mac80211 thought we have the link, but it was never configured */ - if (WARN_ON(!link_info || - link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf))) + ret = iwl_mvm_unset_link_mapping(mvm, vif, link_conf); + if (ret) return 0; - RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id], - NULL); cmd.link_id = cpu_to_le32(link_info->fw_link_id); iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id); link_info->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 1935630d3def..8f4b063d6243 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -360,7 +360,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (mvm->mld_api_is_used && mvm->nvm_data->sku_cap_11be_enable && !iwlwifi_mod_params.disable_11ax && !iwlwifi_mod_params.disable_11be) - hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_MLO; + hw->wiphy->flags |= WIPHY_FLAG_DISABLE_WEXT; /* With MLD FW API, it tracks timing by itself, * no need for any timing from the host @@ -1577,8 +1577,14 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, mvmvif->mvm = mvm; /* the first link always points to the default one */ + mvmvif->deflink.fw_link_id = IWL_MVM_FW_LINK_ID_INVALID; + mvmvif->deflink.active = 0; mvmvif->link[0] = &mvmvif->deflink; + ret = iwl_mvm_set_link_mapping(mvm, vif, &vif->bss_conf); + if (ret) + goto out; + /* * Not much to do here. The stack will not allow interface * types or combinations that we didn't advertise, so we @@ -1783,6 +1789,7 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, mvm->p2p_device_vif = NULL; } + iwl_mvm_unset_link_mapping(mvm, vif, &vif->bss_conf); iwl_mvm_mac_ctxt_remove(mvm, vif); RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c index 1628bf55458f..23e64a757cfe 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -855,10 +855,15 @@ int iwl_mvm_mld_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, int iwl_mvm_mld_rm_sta_id(struct iwl_mvm *mvm, u8 sta_id) { - int ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id); + int ret; lockdep_assert_held(&mvm->mutex); + if (WARN_ON(sta_id == IWL_MVM_INVALID_STA)) + return 0; + + ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id); + RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL); RCU_INIT_POINTER(mvm->fw_id_to_link_sta[sta_id], NULL); return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 44571114fb15..f0b24f00938b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1916,11 +1916,15 @@ int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif); u32 iwl_mvm_get_lmac_id(struct iwl_mvm *mvm, enum nl80211_band band); /* Links */ +int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, u32 changes, bool active); +int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int iwl_mvm_disable_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c index 2ecd32bed752..045c862a8fc4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c @@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm) if (ret) return ERR_PTR(ret); - if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size)) + if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != + resp_size)) { + iwl_free_resp(&cmd); return ERR_PTR(-EIO); + } resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL); + iwl_free_resp(&cmd); + if (!resp) return ERR_PTR(-ENOMEM); - iwl_free_resp(&cmd); return resp; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 1484eaedf452..ce8d83c771a7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm, struct napi_struct *napi, struct sk_buff *skb, int queue, - struct ieee80211_sta *sta, - struct ieee80211_link_sta *link_sta) + struct ieee80211_sta *sta) { if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) { kfree_skb(skb); return; } - if (sta && sta->valid_links && link_sta) { - struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); - - rx_status->link_valid = 1; - rx_status->link_id = link_sta->link_id; - } - ieee80211_rx_napi(mvm->hw, sta, skb, napi); } @@ -588,7 +580,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, while ((skb = __skb_dequeue(skb_list))) { iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, reorder_buf->queue, - sta, NULL /* FIXME */); + sta); reorder_buf->num_stored--; } } @@ -2213,6 +2205,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (IS_ERR(sta)) sta = NULL; link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]); + + if (sta && sta->valid_links && link_sta) { + rx_status->link_valid = 1; + rx_status->link_id = link_sta->link_id; + } } } else if (!is_multicast_ether_addr(hdr->addr2)) { /* @@ -2356,8 +2353,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) rx_status->flag |= RX_FLAG_AMSDU_MORE; - iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, - link_sta); + iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta); } out: rcu_read_unlock(); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index a59d264a11c5..ad960faceb0d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -879,9 +879,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm_session_prot_notif *notif = (void *)pkt->data; unsigned int ver = - iwl_fw_lookup_cmd_ver(mvm->fw, - WIDE_ID(MAC_CONF_GROUP, - SESSION_PROTECTION_CMD), 2); + iwl_fw_lookup_notif_ver(mvm->fw, MAC_CONF_GROUP, + SESSION_PROTECTION_NOTIF, 2); int id = le32_to_cpu(notif->mac_link_id); struct ieee80211_vif *vif; struct iwl_mvm_vif *mvmvif; diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c index 33973a60d0bf..6229c785c845 100644 --- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c @@ -1589,9 +1589,9 @@ void iwl_txq_reclaim(struct iwl_trans *trans, int txq_id, int ssn, return; tfd_num = iwl_txq_get_cmd_index(txq, ssn); - read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr); spin_lock_bh(&txq->lock); + read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr); if (!test_bit(txq_id, trans->txqs.queue_used)) { IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n", diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index 367459bd1345..708132d5be2a 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2233,7 +2233,7 @@ static void rtw8922a_btc_init_cfg(struct rtw89_dev *rtwdev) * Shared-Ant && BTG-path:WL mask(0x55f), others:WL THRU(0x5ff) */ if (btc->ant_type == BTC_ANT_SHARED && btc->btg_pos == path) - rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff); + rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x55f); else rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff); diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c index 9f43f256db1d..f0a4783baf1f 100644 --- a/drivers/net/wwan/t7xx/t7xx_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_cldma.c @@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno) { u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE; - return ioread64(hw_info->ap_pdn_base + offset); + return ioread64_lo_hi(hw_info->ap_pdn_base + offset); } void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address, @@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 : hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0; - iowrite64(address, reg + offset); + iowrite64_lo_hi(address, reg + offset); } void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno, diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c index abc41a7089fa..97163e1e5783 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c @@ -137,8 +137,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool return -ENODEV; } - gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base + + REG_CLDMA_DL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100) return 0; @@ -316,8 +317,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue) struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info; /* Check current processing TGPD, 64-bit address is in a table by Q index */ - ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr != ul_curr_addr) { spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n", diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c index 76da4c15e3de..f071ec7ff23d 100644 --- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c +++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c @@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_ for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) { offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i; reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; - iowrite64(0, reg); + iowrite64_lo_hi(0, reg); } } @@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_ reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset; value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT; - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset; iowrite32(cfg->trsl_id, reg); reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0); - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); /* Ensure ATR is set */ - ioread64(reg); + ioread64_lo_hi(reg); return 0; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ad29f370034e..8d2aee88526c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) return NULL; } skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); + skb_mark_for_recycle(skb); /* Align ip header to a 16 bytes boundary */ skb_reserve(skb, NET_IP_ALIGN); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 943d72bdd794..27281a9a8951 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2076,6 +2076,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT; struct queue_limits lim; struct nvme_id_ns_nvm *nvm = NULL; + struct nvme_zone_info zi = {}; struct nvme_id_ns *id; sector_t capacity; unsigned lbaf; @@ -2088,9 +2089,10 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (id->ncap == 0) { /* namespace not allocated or attached */ info->is_removed = true; - ret = -ENODEV; + ret = -ENXIO; goto out; } + lbaf = nvme_lbaf_index(id->flbas); if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) { ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm); @@ -2098,8 +2100,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + ns->head->ids.csi == NVME_CSI_ZNS) { + ret = nvme_query_zone_info(ns, lbaf, &zi); + if (ret < 0) + goto out; + } + blk_mq_freeze_queue(ns->disk->queue); - lbaf = nvme_lbaf_index(id->flbas); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); @@ -2112,13 +2120,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, capacity = 0; nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - ns->head->ids.csi == NVME_CSI_ZNS) { - ret = nvme_update_zone_info(ns, lbaf, &lim); - if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); - goto out; - } - } + ns->head->ids.csi == NVME_CSI_ZNS) + nvme_update_zone_info(ns, &lim, &zi); ret = queue_limits_commit_update(ns->disk->queue, &lim); if (ret) { blk_mq_unfreeze_queue(ns->disk->queue); @@ -2201,6 +2204,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) } if (!ret && nvme_ns_head_multipath(ns->head)) { + struct queue_limits *ns_lim = &ns->disk->queue->limits; struct queue_limits lim; blk_mq_freeze_queue(ns->head->disk->queue); @@ -2212,7 +2216,26 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); + /* + * queue_limits mixes values that are the hardware limitations + * for bio splitting with what is the device configuration. + * + * For NVMe the device configuration can change after e.g. a + * Format command, and we really want to pick up the new format + * value here. But we must still stack the queue limits to the + * least common denominator for multipathing to split the bios + * properly. + * + * To work around this, we explicitly set the device + * configuration to those that we just queried, but only stack + * the splitting limits in to make sure we still obey possibly + * lower limitations of other controllers. + */ lim = queue_limits_start_update(ns->head->disk->queue); + lim.logical_block_size = ns_lim->logical_block_size; + lim.physical_block_size = ns_lim->physical_block_size; + lim.io_min = ns_lim->io_min; + lim.io_opt = ns_lim->io_opt; queue_limits_stack_bdev(&lim, ns->disk->part0, 0, ns->head->disk->disk_name); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 68a5d971657b..a5b29e9ad342 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2428,7 +2428,7 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) * controller. Called after last nvme_put_ctrl() call */ static void -nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) +nvme_fc_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); @@ -3384,7 +3384,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, - .free_ctrl = nvme_fc_nvme_ctrl_freed, + .free_ctrl = nvme_fc_free_ctrl, .submit_async_event = nvme_fc_submit_async_event, .delete_ctrl = nvme_fc_delete_ctrl, .get_address = nvmf_get_address, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 24193fcb8bd5..d0ed64dc7380 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1036,10 +1036,18 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk) } #endif /* CONFIG_NVME_MULTIPATH */ +struct nvme_zone_info { + u64 zone_size; + unsigned int max_open_zones; + unsigned int max_active_zones; +}; + int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, - struct queue_limits *lim); +int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct nvme_zone_info *zi); +void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, + struct nvme_zone_info *zi); #ifdef CONFIG_BLK_DEV_ZONED blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd, diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 722384bcc765..77aa0f440a6d 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -35,8 +35,8 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl) return 0; } -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, - struct queue_limits *lim) +int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct nvme_zone_info *zi) { struct nvme_effects_log *log = ns->head->effects; struct nvme_command c = { }; @@ -89,27 +89,34 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, goto free_data; } - ns->head->zsze = - nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze)); - if (!is_power_of_2(ns->head->zsze)) { + zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze); + if (!is_power_of_2(zi->zone_size)) { dev_warn(ns->ctrl->device, - "invalid zone size:%llu for namespace:%u\n", - ns->head->zsze, ns->head->ns_id); + "invalid zone size: %llu for namespace: %u\n", + zi->zone_size, ns->head->ns_id); status = -ENODEV; goto free_data; } + zi->max_open_zones = le32_to_cpu(id->mor) + 1; + zi->max_active_zones = le32_to_cpu(id->mar) + 1; - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); - lim->zoned = 1; - lim->max_open_zones = le32_to_cpu(id->mor) + 1; - lim->max_active_zones = le32_to_cpu(id->mar) + 1; - lim->chunk_sectors = ns->head->zsze; - lim->max_zone_append_sectors = ns->ctrl->max_zone_append; free_data: kfree(id); return status; } +void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, + struct nvme_zone_info *zi) +{ + lim->zoned = 1; + lim->max_open_zones = zi->max_open_zones; + lim->max_active_zones = zi->max_active_zones; + lim->max_zone_append_sectors = ns->ctrl->max_zone_append; + lim->chunk_sectors = ns->head->zsze = + nvme_lba_to_sect(ns->head, zi->zone_size); + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); +} + static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, unsigned int nr_zones, size_t *buflen) { diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 77a6e817b315..a2325330bf22 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1613,6 +1613,11 @@ static struct config_group *nvmet_subsys_make(struct config_group *group, return ERR_PTR(-EINVAL); } + if (sysfs_streq(name, nvmet_disc_subsys->subsysnqn)) { + pr_err("can't create subsystem using unique discovery NQN\n"); + return ERR_PTR(-EINVAL); + } + subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME); if (IS_ERR(subsys)) return ERR_CAST(subsys); @@ -2159,7 +2164,49 @@ static const struct config_item_type nvmet_hosts_type = { static struct config_group nvmet_hosts_group; +static ssize_t nvmet_root_discovery_nqn_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn); +} + +static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item, + const char *page, size_t count) +{ + struct list_head *entry; + size_t len; + + len = strcspn(page, "\n"); + if (!len || len > NVMF_NQN_FIELD_LEN - 1) + return -EINVAL; + + down_write(&nvmet_config_sem); + list_for_each(entry, &nvmet_subsystems_group.cg_children) { + struct config_item *item = + container_of(entry, struct config_item, ci_entry); + + if (!strncmp(config_item_name(item), page, len)) { + pr_err("duplicate NQN %s\n", config_item_name(item)); + up_write(&nvmet_config_sem); + return -EINVAL; + } + } + memset(nvmet_disc_subsys->subsysnqn, 0, NVMF_NQN_FIELD_LEN); + memcpy(nvmet_disc_subsys->subsysnqn, page, len); + up_write(&nvmet_config_sem); + + return len; +} + +CONFIGFS_ATTR(nvmet_root_, discovery_nqn); + +static struct configfs_attribute *nvmet_root_attrs[] = { + &nvmet_root_attr_discovery_nqn, + NULL, +}; + static const struct config_item_type nvmet_root_type = { + .ct_attrs = nvmet_root_attrs, .ct_owner = THIS_MODULE, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6bbe4df0166c..8860a3eb71ec 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1541,6 +1541,13 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, } down_read(&nvmet_config_sem); + if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, + NVMF_NQN_SIZE)) { + if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { + up_read(&nvmet_config_sem); + return nvmet_disc_subsys; + } + } list_for_each_entry(p, &port->subsystems, entry) { if (!strncmp(p->subsys->subsysnqn, subsysnqn, NVMF_NQN_SIZE)) { diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index fd229f310c93..337ee1cb09ae 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1115,16 +1115,21 @@ nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) } static bool -nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id) +nvmet_fc_assoc_exists(struct nvmet_fc_tgtport *tgtport, u64 association_id) { struct nvmet_fc_tgt_assoc *a; + bool found = false; + rcu_read_lock(); list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) { - if (association_id == a->association_id) - return true; + if (association_id == a->association_id) { + found = true; + break; + } } + rcu_read_unlock(); - return false; + return found; } static struct nvmet_fc_tgt_assoc * @@ -1164,13 +1169,11 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) ran = ran << BYTES_FOR_QID_SHIFT; spin_lock_irqsave(&tgtport->lock, flags); - rcu_read_lock(); - if (!nvmet_fc_assoc_exits(tgtport, ran)) { + if (!nvmet_fc_assoc_exists(tgtport, ran)) { assoc->association_id = ran; list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list); done = true; } - rcu_read_unlock(); spin_unlock_irqrestore(&tgtport->lock, flags); } while (!done); diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 3bf27052832f..4d57a4e34105 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "OF: " fmt +#include <linux/device.h> #include <linux/of.h> #include <linux/spinlock.h> #include <linux/slab.h> @@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs) { struct of_changeset_entry *ce, *cen; + /* + * When a device is deleted, the device links to/from it are also queued + * for deletion. Until these device links are freed, the devices + * themselves aren't freed. If the device being deleted is due to an + * overlay change, this device might be holding a reference to a device + * node that will be freed. So, wait until all already pending device + * links are deleted before freeing a device node. This ensures we don't + * free any device node that has a non-zero reference count. + */ + device_link_wait_removal(); + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) __of_changeset_entry_destroy(ce); } diff --git a/drivers/of/module.c b/drivers/of/module.c index 0e8aa974f0f2..f58e624953a2 100644 --- a/drivers/of/module.c +++ b/drivers/of/module.c @@ -16,6 +16,14 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len) ssize_t csize; ssize_t tsize; + /* + * Prevent a kernel oops in vsnprintf() -- it only allows passing a + * NULL ptr when the length is also 0. Also filter out the negative + * lengths... + */ + if ((len > 0 && !str) || len < 0) + return -EINVAL; + /* Name & Type */ /* %p eats all alphanum characters, so %c must be used here */ csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T', diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index c78a6fd6c57f..b4efdddb2ad9 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event) u64 event_config = 0; uint64_t cmask; + /* driver does not support branch stack sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + hwc->flags = 0; mapped_event = rvpmu->event_map(event, &event_config); if (mapped_event < 0) { diff --git a/drivers/pinctrl/aspeed/Makefile b/drivers/pinctrl/aspeed/Makefile index 489ea1778353..db2a7600ae2b 100644 --- a/drivers/pinctrl/aspeed/Makefile +++ b/drivers/pinctrl/aspeed/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only # Aspeed pinctrl support -ccflags-y += $(call cc-option,-Woverride-init) +ccflags-y += -Woverride-init obj-$(CONFIG_PINCTRL_ASPEED) += pinctrl-aspeed.o pinmux-aspeed.o obj-$(CONFIG_PINCTRL_ASPEED_G4) += pinctrl-aspeed-g4.o obj-$(CONFIG_PINCTRL_ASPEED_G5) += pinctrl-aspeed-g5.o diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 49f89b70dcec..7f66ec73199a 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev) } ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler, - IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev); + IRQF_SHARED | IRQF_COND_ONESHOT, KBUILD_MODNAME, gpio_dev); if (ret) goto out2; diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index d70f793ce4b3..403525cc1783 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -443,7 +443,7 @@ of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args) if (IS_ERR(pwm)) return pwm; - if (args->args_count > 1) + if (args->args_count > 0) pwm->args.period = args->args[0]; pwm->args.polarity = PWM_POLARITY_NORMAL; diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index d79a96679a26..d6596583ed4e 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -284,9 +284,9 @@ static int img_pwm_probe(struct platform_device *pdev) return PTR_ERR(imgchip->sys_clk); } - imgchip->pwm_clk = devm_clk_get(&pdev->dev, "imgchip"); + imgchip->pwm_clk = devm_clk_get(&pdev->dev, "pwm"); if (IS_ERR(imgchip->pwm_clk)) { - dev_err(&pdev->dev, "failed to get imgchip clock\n"); + dev_err(&pdev->dev, "failed to get pwm clock\n"); return PTR_ERR(imgchip->pwm_clk); } diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 2f4ac9591c8f..271dfad05d68 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -150,6 +150,8 @@ static unsigned int max_nr_fru; /* Total length of record including headers and list of descriptor entries. */ static size_t max_rec_len; +#define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255)) + /* Total number of SPA entries across all FRUs. */ static unsigned int spa_nr_entries; @@ -475,6 +477,16 @@ static void set_rec_fields(struct fru_rec *rec) struct cper_section_descriptor *sec_desc = &rec->sec_desc; struct cper_record_header *hdr = &rec->hdr; + /* + * This is a saved record created with fewer max_nr_entries. + * Update the record lengths and keep everything else as-is. + */ + if (hdr->record_length && hdr->record_length < max_rec_len) { + pr_debug("Growing record 0x%016llx from %u to %zu bytes\n", + hdr->record_id, hdr->record_length, max_rec_len); + goto update_lengths; + } + memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); hdr->revision = CPER_RECORD_REV; hdr->signature_end = CPER_SIG_END; @@ -489,19 +501,21 @@ static void set_rec_fields(struct fru_rec *rec) hdr->error_severity = CPER_SEV_RECOVERABLE; hdr->validation_bits = 0; - hdr->record_length = max_rec_len; hdr->creator_id = CPER_CREATOR_FMP; hdr->notification_type = CPER_NOTIFY_MCE; hdr->record_id = cper_next_record_id(); hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; sec_desc->section_offset = sizeof(struct cper_record_header); - sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); sec_desc->revision = CPER_SEC_REV; sec_desc->validation_bits = 0; sec_desc->flags = CPER_SEC_PRIMARY; sec_desc->section_type = CPER_SECTION_TYPE_FMP; sec_desc->section_severity = CPER_SEV_RECOVERABLE; + +update_lengths: + hdr->record_length = max_rec_len; + sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); } static int save_new_records(void) @@ -512,16 +526,18 @@ static int save_new_records(void) int ret = 0; for_each_fru(i, rec) { - if (rec->hdr.record_length) + /* No need to update saved records that match the current record size. */ + if (rec->hdr.record_length == max_rec_len) continue; + if (!rec->hdr.record_length) + set_bit(i, new_records); + set_rec_fields(rec); ret = update_record_on_storage(rec); if (ret) goto out_clear; - - set_bit(i, new_records); } return ret; @@ -641,12 +657,7 @@ static int get_saved_records(void) int ret, pos; ssize_t len; - /* - * Assume saved records match current max size. - * - * However, this may not be true depending on module parameters. - */ - old = kmalloc(max_rec_len, GFP_KERNEL); + old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL); if (!old) { ret = -ENOMEM; goto out; @@ -663,21 +674,31 @@ static int get_saved_records(void) * Make sure to clear temporary buffer between reads to avoid * leftover data from records of various sizes. */ - memset(old, 0, max_rec_len); + memset(old, 0, FMPM_MAX_REC_LEN); - len = erst_read_record(record_id, &old->hdr, max_rec_len, + len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN, sizeof(struct fru_rec), &CPER_CREATOR_FMP); if (len < 0) continue; - if (len > max_rec_len) { - pr_debug("Found record larger than max_rec_len\n"); + new = get_valid_record(old); + if (!new) { + erst_clear(record_id); continue; } - new = get_valid_record(old); - if (!new) - erst_clear(record_id); + if (len > max_rec_len) { + unsigned int saved_nr_entries; + + saved_nr_entries = len - sizeof(struct fru_rec); + saved_nr_entries /= sizeof(struct cper_fru_poison_desc); + + pr_warn("Saved record found with %u entries.\n", saved_nr_entries); + pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries); + + ret = -EINVAL; + goto out_end; + } /* Restore the record */ memcpy(new, old, len); diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h index 4749ccdeeba1..5a2f48439258 100644 --- a/drivers/ras/debugfs.h +++ b/drivers/ras/debugfs.h @@ -4,6 +4,10 @@ #include <linux/debugfs.h> +#if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *ras_get_debugfs_root(void); +#else +static inline struct dentry *ras_get_debugfs_root(void) { return NULL; } +#endif /* DEBUG_FS */ #endif /* __RAS_DEBUGFS_H__ */ diff --git a/drivers/regulator/tps65132-regulator.c b/drivers/regulator/tps65132-regulator.c index a06f5f2d7932..9c2f0dd42613 100644 --- a/drivers/regulator/tps65132-regulator.c +++ b/drivers/regulator/tps65132-regulator.c @@ -267,10 +267,17 @@ static const struct i2c_device_id tps65132_id[] = { }; MODULE_DEVICE_TABLE(i2c, tps65132_id); +static const struct of_device_id __maybe_unused tps65132_of_match[] = { + { .compatible = "ti,tps65132" }, + {}, +}; +MODULE_DEVICE_TABLE(of, tps65132_of_match); + static struct i2c_driver tps65132_i2c_driver = { .driver = { .name = "tps65132", .probe_type = PROBE_PREFER_ASYNCHRONOUS, + .of_match_table = of_match_ptr(tps65132_of_match), }, .probe = tps65132_probe, .id_table = tps65132_id, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a0cce6872075..f0b8b709649f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, } } +/** + * qeth_irq() - qeth interrupt handler + * @cdev: ccw device + * @intparm: expect pointer to iob + * @irb: Interruption Response Block + * + * In the good path: + * corresponding qeth channel is locked with last used iob as active_cmd. + * But this function is also called for error interrupts. + * + * Caller ensures that: + * Interrupts are disabled; ccw device lock is held; + * + */ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { @@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, iob = (struct qeth_cmd_buffer *) (addr_t)intparm; } - qeth_unlock_channel(card, channel); - rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); return; @@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); @@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } } + if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) { + /* channel command hasn't started: retry. + * active_cmd is still set to last iob + */ + QETH_CARD_TEXT(card, 2, "irqcc1"); + rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob), + (addr_t)iob, 0, 0, iob->timeout); + if (rc) { + QETH_DBF_MESSAGE(2, + "ccw retry on %x failed, rc = %i\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, " err%d", rc); + qeth_unlock_channel(card, channel); + qeth_cancel_cmd(iob, rc); + } + return; + } + + qeth_unlock_channel(card, channel); + if (iob) { /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index 2c246e80c1c4..d91659811eb3 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -833,7 +833,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, BNX2FC_TGT_DBG(tgt, "Freeing up session resources\n"); - spin_lock_bh(&tgt->cq_lock); ctx_base_ptr = tgt->ctx_base; tgt->ctx_base = NULL; @@ -889,7 +888,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, tgt->sq, tgt->sq_dma); tgt->sq = NULL; } - spin_unlock_bh(&tgt->cq_lock); if (ctx_base_ptr) iounmap(ctx_base_ptr); diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index 1befcd5b2a0f..fa07a6f54003 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -102,7 +102,9 @@ do { \ #define MAX_RETRIES 1 -static struct class * ch_sysfs_class; +static const struct class ch_sysfs_class = { + .name = "scsi_changer", +}; typedef struct { struct kref ref; @@ -930,7 +932,7 @@ static int ch_probe(struct device *dev) mutex_init(&ch->lock); kref_init(&ch->ref); ch->device = sd; - class_dev = device_create(ch_sysfs_class, dev, + class_dev = device_create(&ch_sysfs_class, dev, MKDEV(SCSI_CHANGER_MAJOR, ch->minor), ch, "s%s", ch->name); if (IS_ERR(class_dev)) { @@ -955,7 +957,7 @@ static int ch_probe(struct device *dev) return 0; destroy_dev: - device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor)); + device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor)); put_device: scsi_device_put(sd); remove_idr: @@ -974,7 +976,7 @@ static int ch_remove(struct device *dev) dev_set_drvdata(dev, NULL); spin_unlock(&ch_index_lock); - device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR,ch->minor)); + device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor)); scsi_device_put(ch->device); kref_put(&ch->ref, ch_destroy); return 0; @@ -1003,11 +1005,9 @@ static int __init init_ch_module(void) int rc; printk(KERN_INFO "SCSI Media Changer driver v" VERSION " \n"); - ch_sysfs_class = class_create("scsi_changer"); - if (IS_ERR(ch_sysfs_class)) { - rc = PTR_ERR(ch_sysfs_class); + rc = class_register(&ch_sysfs_class); + if (rc) return rc; - } rc = register_chrdev(SCSI_CHANGER_MAJOR,"ch",&changer_fops); if (rc < 0) { printk("Unable to get major %d for SCSI-Changer\n", @@ -1022,7 +1022,7 @@ static int __init init_ch_module(void) fail2: unregister_chrdev(SCSI_CHANGER_MAJOR, "ch"); fail1: - class_destroy(ch_sysfs_class); + class_unregister(&ch_sysfs_class); return rc; } @@ -1030,7 +1030,7 @@ static void __exit exit_ch_module(void) { scsi_unregister_driver(&ch_template.gendrv); unregister_chrdev(SCSI_CHANGER_MAJOR, "ch"); - class_destroy(ch_sysfs_class); + class_unregister(&ch_sysfs_class); idr_destroy(&ch_index_idr); } diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index debd36974119..e8382cc5cf23 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -28,7 +28,12 @@ MODULE_AUTHOR("Manoj N. Kumar <manoj@linux.vnet.ibm.com>"); MODULE_AUTHOR("Matthew R. Ochs <mrochs@linux.vnet.ibm.com>"); MODULE_LICENSE("GPL"); -static struct class *cxlflash_class; +static char *cxlflash_devnode(const struct device *dev, umode_t *mode); +static const struct class cxlflash_class = { + .name = "cxlflash", + .devnode = cxlflash_devnode, +}; + static u32 cxlflash_major; static DECLARE_BITMAP(cxlflash_minor, CXLFLASH_MAX_ADAPTERS); @@ -3602,7 +3607,7 @@ static int init_chrdev(struct cxlflash_cfg *cfg) goto err1; } - char_dev = device_create(cxlflash_class, NULL, devno, + char_dev = device_create(&cxlflash_class, NULL, devno, NULL, "cxlflash%d", minor); if (IS_ERR(char_dev)) { rc = PTR_ERR(char_dev); @@ -3880,14 +3885,12 @@ static int cxlflash_class_init(void) cxlflash_major = MAJOR(devno); - cxlflash_class = class_create("cxlflash"); - if (IS_ERR(cxlflash_class)) { - rc = PTR_ERR(cxlflash_class); + rc = class_register(&cxlflash_class); + if (rc) { pr_err("%s: class_create failed rc=%d\n", __func__, rc); goto err; } - cxlflash_class->devnode = cxlflash_devnode; out: pr_debug("%s: returning rc=%d\n", __func__, rc); return rc; @@ -3903,7 +3906,7 @@ static void cxlflash_class_exit(void) { dev_t devno = MKDEV(cxlflash_major, 0); - class_destroy(cxlflash_class); + class_unregister(&cxlflash_class); unregister_chrdev_region(devno, CXLFLASH_MAX_ADAPTERS); } diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 4f495a41ec4a..2d92549e5243 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -353,12 +353,13 @@ static void scsi_host_dev_release(struct device *dev) if (shost->shost_state == SHOST_CREATED) { /* - * Free the shost_dev device name here if scsi_host_alloc() - * and scsi_host_put() have been called but neither + * Free the shost_dev device name and remove the proc host dir + * here if scsi_host_{alloc,put}() have been called but neither * scsi_host_add() nor scsi_remove_host() has been called. * This avoids that the memory allocated for the shost_dev - * name is leaked. + * name as well as the proc dir structure are leaked. */ + scsi_proc_hostdir_rm(shost->hostt); kfree(dev_name(&shost->shost_dev)); } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index a2204674b680..f6e6db8b8aba 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -135,7 +135,7 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size, static inline void *alloc_smp_req(int size) { - u8 *p = kzalloc(size, GFP_KERNEL); + u8 *p = kzalloc(ALIGN(size, ARCH_DMA_MINALIGN), GFP_KERNEL); if (p) p[0] = SMP_REQUEST; return p; @@ -1621,6 +1621,16 @@ out_err: /* ---------- Domain revalidation ---------- */ +static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp, + u8 *sas_addr, + enum sas_device_type *type) +{ + memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE); + *type = to_dev_type(&disc_resp->disc); + if (*type == SAS_PHY_UNUSED) + memset(sas_addr, 0, SAS_ADDR_SIZE); +} + static int sas_get_phy_discover(struct domain_device *dev, int phy_id, struct smp_disc_resp *disc_resp) { @@ -1674,13 +1684,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, return -ENOMEM; res = sas_get_phy_discover(dev, phy_id, disc_resp); - if (res == 0) { - memcpy(sas_addr, disc_resp->disc.attached_sas_addr, - SAS_ADDR_SIZE); - *type = to_dev_type(&disc_resp->disc); - if (*type == 0) - memset(sas_addr, 0, SAS_ADDR_SIZE); - } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type); kfree(disc_resp); return res; } @@ -1940,6 +1945,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; enum sas_device_type type = SAS_PHY_UNUSED; + struct smp_disc_resp *disc_resp; u8 sas_addr[SAS_ADDR_SIZE]; char msg[80] = ""; int res; @@ -1951,33 +1957,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(dev->sas_addr), phy_id, msg); memset(sas_addr, 0, SAS_ADDR_SIZE); - res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); switch (res) { case SMP_RESP_NO_PHY: phy->phy_state = PHY_NOT_PRESENT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_PHY_VACANT: phy->phy_state = PHY_VACANT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_FUNC_ACC: break; case -ECOMM: break; default: - return res; + goto out_free_resp; } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type); + if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) { phy->phy_state = PHY_EMPTY; sas_unregister_devs_sas_addr(dev, phy_id, last); /* - * Even though the PHY is empty, for convenience we discover - * the PHY to update the PHY info, like negotiated linkrate. + * Even though the PHY is empty, for convenience we update + * the PHY info, like negotiated linkrate. */ - sas_ex_phy_discover(dev, phy_id); - return res; + if (res == 0) + sas_set_ex_phy(dev, phy_id, disc_resp); + goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); @@ -1989,7 +2003,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, action = ", needs recovery"; pr_debug("ex %016llx phy%02d broadcast flutter%s\n", SAS_ADDR(dev->sas_addr), phy_id, action); - return res; + goto out_free_resp; } /* we always have to delete the old device when we went here */ @@ -1998,7 +2012,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last); - return sas_discover_new(dev, phy_id); + res = sas_discover_new(dev, phy_id); +out_free_resp: + kfree(disc_resp); + return res; } /** diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 30d20d37554f..98ca7df003ef 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1333,7 +1333,6 @@ struct lpfc_hba { struct timer_list fabric_block_timer; unsigned long bit_flags; atomic_t num_rsrc_err; - atomic_t num_cmd_success; unsigned long last_rsrc_error_time; unsigned long last_ramp_down_time; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -1438,6 +1437,7 @@ struct lpfc_hba { struct timer_list inactive_vmid_poll; /* RAS Support */ + spinlock_t ras_fwlog_lock; /* do not take while holding another lock */ struct lpfc_ras_fwlog ras_fwlog; uint32_t iocb_cnt; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 365c7e96070b..3c534b3cfe91 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5865,9 +5865,9 @@ lpfc_ras_fwlog_buffsize_set(struct lpfc_hba *phba, uint val) if (phba->cfg_ras_fwlog_func != PCI_FUNC(phba->pcidev->devfn)) return -EINVAL; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); state = phba->ras_fwlog.state; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); if (state == REG_INPROGRESS) { lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6147 RAS Logging " diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index d80e6e81053b..529df1768fa8 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2513,7 +2513,7 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi) return -ENOMEM; } - dmabuff = (struct lpfc_dmabuf *)mbox->ctx_buf; + dmabuff = mbox->ctx_buf; mbox->ctx_buf = NULL; mbox->ctx_ndlp = NULL; status = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO); @@ -3169,10 +3169,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) } cmdwqe = &cmdiocbq->wqe; - memset(cmdwqe, 0, sizeof(union lpfc_wqe)); + memset(cmdwqe, 0, sizeof(*cmdwqe)); if (phba->sli_rev < LPFC_SLI_REV4) { rspwqe = &rspiocbq->wqe; - memset(rspwqe, 0, sizeof(union lpfc_wqe)); + memset(rspwqe, 0, sizeof(*rspwqe)); } INIT_LIST_HEAD(&head); @@ -3376,7 +3376,7 @@ lpfc_bsg_issue_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) unsigned long flags; uint8_t *pmb, *pmb_buf; - dd_data = pmboxq->ctx_ndlp; + dd_data = pmboxq->ctx_u.dd_data; /* * The outgoing buffer is readily referred from the dma buffer, @@ -3553,7 +3553,7 @@ lpfc_bsg_issue_mbox_ext_handle_job(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) struct lpfc_sli_config_mbox *sli_cfg_mbx; uint8_t *pmbx; - dd_data = pmboxq->ctx_buf; + dd_data = pmboxq->ctx_u.dd_data; /* Determine if job has been aborted */ spin_lock_irqsave(&phba->ct_ev_lock, flags); @@ -3940,7 +3940,7 @@ lpfc_bsg_sli_cfg_read_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_read_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->ctx_buf = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4112,7 +4112,7 @@ lpfc_bsg_sli_cfg_write_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->ctx_buf = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4460,7 +4460,7 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->ctx_buf = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4747,7 +4747,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job, if (mbox_req->inExtWLen || mbox_req->outExtWLen) { from = pmbx; ext = from + sizeof(MAILBOX_t); - pmboxq->ctx_buf = ext; + pmboxq->ext_buf = ext; pmboxq->in_ext_byte_len = mbox_req->inExtWLen * sizeof(uint32_t); pmboxq->out_ext_byte_len = @@ -4875,7 +4875,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_mbox_cmpl; /* setup context field to pass wait_queue pointer to wake function */ - pmboxq->ctx_ndlp = dd_data; + pmboxq->ctx_u.dd_data = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -5070,12 +5070,12 @@ lpfc_bsg_get_ras_config(struct bsg_job *job) bsg_reply->reply_data.vendor_reply.vendor_rsp; /* Current logging state */ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state == ACTIVE) ras_reply->state = LPFC_RASLOG_STATE_RUNNING; else ras_reply->state = LPFC_RASLOG_STATE_STOPPED; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); ras_reply->log_level = phba->ras_fwlog.fw_loglevel; ras_reply->log_buff_sz = phba->cfg_ras_fwlog_buffsize; @@ -5132,13 +5132,13 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) if (action == LPFC_RASACTION_STOP_LOGGING) { /* Check if already disabled */ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state != ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); rc = -ESRCH; goto ras_job_error; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); /* Disable logging */ lpfc_ras_stop_fwlog(phba); @@ -5149,10 +5149,10 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) * FW-logging with new log-level. Return status * "Logging already Running" to caller. **/ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state != INACTIVE) action_status = -EINPROGRESS; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); /* Enable logging */ rc = lpfc_sli4_ras_fwlog_init(phba, log_level, @@ -5268,13 +5268,13 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job) goto ras_job_error; /* Logging to be stopped before reading */ - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (ras_fwlog->state == ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); rc = -EINPROGRESS; goto ras_job_error; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); if (job->request_len < sizeof(struct fc_bsg_request) + diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index ab5af10c8a16..a2d2b02b3418 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2194,12 +2194,12 @@ static int lpfc_debugfs_ras_log_data(struct lpfc_hba *phba, memset(buffer, 0, size); - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (phba->ras_fwlog.state != ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); return -EINVAL; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); list_for_each_entry_safe(dmabuf, next, &phba->ras_fwlog.fwlog_buff_list, list) { @@ -2250,13 +2250,13 @@ lpfc_debugfs_ras_log_open(struct inode *inode, struct file *file) int size; int rc = -ENOMEM; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); if (phba->ras_fwlog.state != ACTIVE) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); rc = -EINVAL; goto out; } - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); if (check_mul_overflow(LPFC_RAS_MIN_BUFF_POST_SIZE, phba->cfg_ras_fwlog_buffsize, &size)) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 28e56542e072..f7c28dc73bf6 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4437,23 +4437,23 @@ lpfc_els_retry_delay(struct timer_list *t) unsigned long flags; struct lpfc_work_evt *evtp = &ndlp->els_retry_evt; + /* Hold a node reference for outstanding queued work */ + if (!lpfc_nlp_get(ndlp)) + return; + spin_lock_irqsave(&phba->hbalock, flags); if (!list_empty(&evtp->evt_listp)) { spin_unlock_irqrestore(&phba->hbalock, flags); + lpfc_nlp_put(ndlp); return; } - /* We need to hold the node by incrementing the reference - * count until the queued work is done - */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); - if (evtp->evt_arg1) { - evtp->evt = LPFC_EVT_ELS_RETRY; - list_add_tail(&evtp->evt_listp, &phba->work_list); - lpfc_worker_wake_up(phba); - } + evtp->evt_arg1 = ndlp; + evtp->evt = LPFC_EVT_ELS_RETRY; + list_add_tail(&evtp->evt_listp, &phba->work_list); spin_unlock_irqrestore(&phba->hbalock, flags); - return; + + lpfc_worker_wake_up(phba); } /** @@ -7238,7 +7238,7 @@ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context) goto rdp_fail; mbox->vport = rdp_context->ndlp->vport; mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + mbox->ctx_u.rdp = rdp_context; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED); @@ -7290,7 +7290,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->in_ext_byte_len = DMP_SFF_PAGE_A0_SIZE; mbox->out_ext_byte_len = DMP_SFF_PAGE_A0_SIZE; mbox->mbox_offset_word = 5; - mbox->ctx_buf = virt; + mbox->ext_buf = virt; } else { bf_set(lpfc_mbx_memory_dump_type3_length, &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A0_SIZE); @@ -7298,7 +7298,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); } mbox->vport = phba->pport; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); if (rc == MBX_NOT_FINISHED) { @@ -7307,7 +7306,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, } if (phba->sli_rev == LPFC_SLI_REV4) - mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); + mp = mbox->ctx_buf; else mp = mpsave; @@ -7350,7 +7349,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->in_ext_byte_len = DMP_SFF_PAGE_A2_SIZE; mbox->out_ext_byte_len = DMP_SFF_PAGE_A2_SIZE; mbox->mbox_offset_word = 5; - mbox->ctx_buf = virt; + mbox->ext_buf = virt; } else { bf_set(lpfc_mbx_memory_dump_type3_length, &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A2_SIZE); @@ -7358,7 +7357,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); } - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) { rc = 1; @@ -7500,9 +7498,9 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) int rc; mb = &pmb->u.mb; - lcb_context = (struct lpfc_lcb_context *)pmb->ctx_ndlp; + lcb_context = pmb->ctx_u.lcb; ndlp = lcb_context->ndlp; - pmb->ctx_ndlp = NULL; + memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u)); pmb->ctx_buf = NULL; shdr = (union lpfc_sli4_cfg_shdr *) @@ -7642,7 +7640,7 @@ lpfc_sli4_set_beacon(struct lpfc_vport *vport, lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON, LPFC_MBOX_OPCODE_SET_BEACON_CONFIG, len, LPFC_SLI4_MBX_EMBED); - mbox->ctx_ndlp = (void *)lcb_context; + mbox->ctx_u.lcb = lcb_context; mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_els_lcb_rsp; bf_set(lpfc_mbx_set_beacon_port_num, &mbox->u.mqe.un.beacon_config, @@ -8639,9 +8637,9 @@ lpfc_els_rsp_rls_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb = &pmb->u.mb; ndlp = pmb->ctx_ndlp; - rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff); - oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff); - pmb->ctx_buf = NULL; + rxid = (uint16_t)(pmb->ctx_u.ox_rx_id & 0xffff); + oxid = (uint16_t)((pmb->ctx_u.ox_rx_id >> 16) & 0xffff); + memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u)); pmb->ctx_ndlp = NULL; if (mb->mbxStatus) { @@ -8745,8 +8743,7 @@ lpfc_els_rcv_rls(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC); if (mbox) { lpfc_read_lnk_stat(phba, mbox); - mbox->ctx_buf = (void *)((unsigned long) - (ox_id << 16 | ctx)); + mbox->ctx_u.ox_rx_id = ox_id << 16 | ctx; mbox->ctx_ndlp = lpfc_nlp_get(ndlp); if (!mbox->ctx_ndlp) goto node_err; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index a7a2309a629f..e42fa9c822b5 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -257,7 +257,9 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) if (evtp->evt_arg1) { evtp->evt = LPFC_EVT_DEV_LOSS; list_add_tail(&evtp->evt_listp, &phba->work_list); + spin_unlock_irqrestore(&phba->hbalock, iflags); lpfc_worker_wake_up(phba); + return; } spin_unlock_irqrestore(&phba->hbalock, iflags); } else { @@ -275,10 +277,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); } - } - - return; } /** @@ -3429,7 +3428,7 @@ static void lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + struct lpfc_dmabuf *mp = pmb->ctx_buf; struct lpfc_vport *vport = pmb->vport; struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct serv_parm *sp = &vport->fc_sparam; @@ -3737,7 +3736,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_mbx_read_top *la; struct lpfc_sli_ring *pring; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); + struct lpfc_dmabuf *mp = pmb->ctx_buf; uint8_t attn_type; /* Unblock ELS traffic */ @@ -3851,8 +3850,8 @@ void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_dmabuf *mp = pmb->ctx_buf; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; /* The driver calls the state machine with the pmb pointer * but wants to make sure a stale ctx_buf isn't acted on. @@ -4066,7 +4065,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba) * the dump routine is a single-use construct. */ if (pmb->ctx_buf) { - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); pmb->ctx_buf = NULL; @@ -4089,7 +4088,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba) if (phba->sli_rev == LPFC_SLI_REV4) { byte_count = pmb->u.mqe.un.mb_words[5]; - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; if (byte_count > sizeof(struct static_vport_info) - offset) byte_count = sizeof(struct static_vport_info) @@ -4169,7 +4168,7 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; pmb->ctx_ndlp = NULL; @@ -4307,7 +4306,7 @@ void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; struct lpfc_vport *vport = pmb->vport; int rc; @@ -4431,7 +4430,7 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; pmb->ctx_ndlp = NULL; if (mb->mbxStatus) { @@ -5174,7 +5173,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_vport *vport = pmb->vport; struct lpfc_nodelist *ndlp; - ndlp = (struct lpfc_nodelist *)(pmb->ctx_ndlp); + ndlp = pmb->ctx_ndlp; if (!ndlp) return; lpfc_issue_els_logo(vport, ndlp, 0); @@ -5496,7 +5495,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if ((mb = phba->sli.mbox_active)) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && !(mb->mbox_flag & LPFC_MBX_IMED_UNREG) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { mb->ctx_ndlp = NULL; mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; } @@ -5507,7 +5506,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) list_for_each_entry(mb, &phba->sli.mboxq_cmpl, list) { if ((mb->u.mb.mbxCommand != MBX_REG_LOGIN64) || (mb->mbox_flag & LPFC_MBX_IMED_UNREG) || - (ndlp != (struct lpfc_nodelist *)mb->ctx_ndlp)) + (ndlp != mb->ctx_ndlp)) continue; mb->ctx_ndlp = NULL; @@ -5517,7 +5516,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && !(mb->mbox_flag & LPFC_MBX_IMED_UNREG) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { list_del(&mb->list); lpfc_mbox_rsrc_cleanup(phba, mb, MBOX_THD_LOCKED); @@ -6357,7 +6356,7 @@ void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + struct lpfc_nodelist *ndlp = pmb->ctx_ndlp; struct lpfc_vport *vport = pmb->vport; pmb->ctx_ndlp = NULL; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 88b2e57d90c2..f7a0aa3625f4 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -460,7 +460,7 @@ lpfc_config_port_post(struct lpfc_hba *phba) return -EIO; } - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; /* This dmabuf was allocated by lpfc_read_sparam. The dmabuf is no * longer needed. Prevent unintended ctx_buf access as the mbox is @@ -2217,7 +2217,7 @@ lpfc_handle_latt(struct lpfc_hba *phba) /* Cleanup any outstanding ELS commands */ lpfc_els_flush_all_cmd(phba); psli->slistat.link_event++; - lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf); + lpfc_read_topology(phba, pmb, pmb->ctx_buf); pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology; pmb->vport = vport; /* Block ELS IOCBs until we have processed this mbox command */ @@ -5454,7 +5454,7 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba, phba->sli.slistat.link_event++; /* Create lpfc_handle_latt mailbox command from link ACQE */ - lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf); + lpfc_read_topology(phba, pmb, pmb->ctx_buf); pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology; pmb->vport = phba->pport; @@ -6347,7 +6347,7 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) phba->sli.slistat.link_event++; /* Create lpfc_handle_latt mailbox command from link ACQE */ - lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf); + lpfc_read_topology(phba, pmb, pmb->ctx_buf); pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology; pmb->vport = phba->pport; @@ -7705,6 +7705,9 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba) "NVME" : " "), (phba->nvmet_support ? "NVMET" : " ")); + /* ras_fwlog state */ + spin_lock_init(&phba->ras_fwlog_lock); + /* Initialize the IO buffer list used by driver for SLI3 SCSI */ spin_lock_init(&phba->scsi_buf_list_get_lock); INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get); @@ -13055,7 +13058,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) rc = request_threaded_irq(eqhdl->irq, &lpfc_sli4_hba_intr_handler, &lpfc_sli4_hba_intr_handler_th, - IRQF_ONESHOT, name, eqhdl); + 0, name, eqhdl); if (rc) { lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0486 MSI-X fast-path (%d) " diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index f7c41958036b..e98f1c2b2220 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -102,7 +102,7 @@ lpfc_mbox_rsrc_cleanup(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox, { struct lpfc_dmabuf *mp; - mp = (struct lpfc_dmabuf *)mbox->ctx_buf; + mp = mbox->ctx_buf; mbox->ctx_buf = NULL; /* Release the generic BPL buffer memory. */ @@ -204,10 +204,8 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset, uint16_t region_id) { MAILBOX_t *mb; - void *ctx; mb = &pmb->u.mb; - ctx = pmb->ctx_buf; /* Setup to dump VPD region */ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); @@ -219,7 +217,6 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset, mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t)); mb->un.varDmp.co = 0; mb->un.varDmp.resp_offset = 0; - pmb->ctx_buf = ctx; mb->mbxOwner = OWN_HOST; return; } @@ -236,11 +233,8 @@ void lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb; - void *ctx; mb = &pmb->u.mb; - /* Save context so that we can restore after memset */ - ctx = pmb->ctx_buf; /* Setup to dump VPD region */ memset(pmb, 0, sizeof(LPFC_MBOXQ_t)); @@ -254,7 +248,6 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb->un.varDmp.word_cnt = WAKE_UP_PARMS_WORD_SIZE; mb->un.varDmp.co = 0; mb->un.varDmp.resp_offset = 0; - pmb->ctx_buf = ctx; return; } @@ -372,7 +365,7 @@ lpfc_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, /* Save address for later completion and set the owner to host so that * the FW knows this mailbox is available for processing. */ - pmb->ctx_buf = (uint8_t *)mp; + pmb->ctx_buf = mp; mb->mbxOwner = OWN_HOST; return (0); } @@ -1816,7 +1809,7 @@ lpfc_sli4_mbox_cmd_free(struct lpfc_hba *phba, struct lpfcMboxq *mbox) } /* Reinitialize the context pointers to avoid stale usage. */ mbox->ctx_buf = NULL; - mbox->context3 = NULL; + memset(&mbox->ctx_u, 0, sizeof(mbox->ctx_u)); kfree(mbox->sge_array); /* Finally, free the mailbox command itself */ mempool_free(mbox, phba->mbox_mem_pool); @@ -2366,8 +2359,7 @@ lpfc_mbx_cmpl_rdp_link_stat(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) { MAILBOX_t *mb; int rc = FAILURE; - struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mboxq->ctx_ndlp); + struct lpfc_rdp_context *rdp_context = mboxq->ctx_u.rdp; mb = &mboxq->u.mb; if (mb->mbxStatus) @@ -2385,9 +2377,8 @@ mbx_failed: static void lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) { - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)mbox->ctx_buf; - struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mbox->ctx_ndlp); + struct lpfc_dmabuf *mp = mbox->ctx_buf; + struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp; if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) goto error_mbox_free; @@ -2401,7 +2392,7 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) /* Save the dma buffer for cleanup in the final completion. */ mbox->ctx_buf = mp; mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + mbox->ctx_u.rdp = rdp_context; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED) goto error_mbox_free; @@ -2416,9 +2407,8 @@ void lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) { int rc; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); - struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mbox->ctx_ndlp); + struct lpfc_dmabuf *mp = mbox->ctx_buf; + struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp; if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) goto error; @@ -2448,7 +2438,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a2; - mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; + mbox->ctx_u.rdp = rdp_context; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) goto error; diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 8e425be7c7c9..c4172791c267 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -300,7 +300,7 @@ lpfc_defer_plogi_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *login_mbox) int rc; ndlp = login_mbox->ctx_ndlp; - save_iocb = login_mbox->context3; + save_iocb = login_mbox->ctx_u.save_iocb; if (mb->mbxStatus == MBX_SUCCESS) { /* Now that REG_RPI completed successfully, @@ -640,7 +640,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, if (!login_mbox->ctx_ndlp) goto out; - login_mbox->context3 = save_iocb; /* For PLOGI ACC */ + login_mbox->ctx_u.save_iocb = save_iocb; /* For PLOGI ACC */ spin_lock_irq(&ndlp->lock); ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI); @@ -682,8 +682,8 @@ lpfc_mbx_cmpl_resume_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) struct lpfc_nodelist *ndlp; uint32_t cmd; - elsiocb = (struct lpfc_iocbq *)mboxq->ctx_buf; - ndlp = (struct lpfc_nodelist *)mboxq->ctx_ndlp; + elsiocb = mboxq->ctx_u.save_iocb; + ndlp = mboxq->ctx_ndlp; vport = mboxq->vport; cmd = elsiocb->drvrTimeout; @@ -1875,7 +1875,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, /* cleanup any ndlp on mbox q waiting for reglogin cmpl */ if ((mb = phba->sli.mbox_active)) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; lpfc_nlp_put(ndlp); mb->ctx_ndlp = NULL; @@ -1886,7 +1886,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && - (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + (ndlp == mb->ctx_ndlp)) { ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; lpfc_nlp_put(ndlp); list_del(&mb->list); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 09c53b85bcb8..c5792eaf3f64 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2616,9 +2616,9 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) /* No concern about the role change on the nvme remoteport. * The transport will update it. */ - spin_lock_irq(&vport->phba->hbalock); + spin_lock_irq(&ndlp->lock); ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT; - spin_unlock_irq(&vport->phba->hbalock); + spin_unlock_irq(&ndlp->lock); /* Don't let the host nvme transport keep sending keep-alives * on this remoteport. Vport is unloading, no recovery. The diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 8258b771bd00..561ced5503c6 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) wqe = &nvmewqe->wqe; /* Initialize WQE */ - memset(wqe, 0, sizeof(union lpfc_wqe)); + memset(wqe, 0, sizeof(*wqe)); ctx_buf->iocbq->cmd_dmabuf = NULL; spin_lock(&phba->sli4_hba.sgl_list_lock); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index c0038eaae7b0..4a6e5223a224 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -167,11 +167,10 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) struct Scsi_Host *shost; struct scsi_device *sdev; unsigned long new_queue_depth; - unsigned long num_rsrc_err, num_cmd_success; + unsigned long num_rsrc_err; int i; num_rsrc_err = atomic_read(&phba->num_rsrc_err); - num_cmd_success = atomic_read(&phba->num_cmd_success); /* * The error and success command counters are global per @@ -186,20 +185,16 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { shost = lpfc_shost_from_vport(vports[i]); shost_for_each_device(sdev, shost) { - new_queue_depth = - sdev->queue_depth * num_rsrc_err / - (num_rsrc_err + num_cmd_success); - if (!new_queue_depth) - new_queue_depth = sdev->queue_depth - 1; + if (num_rsrc_err >= sdev->queue_depth) + new_queue_depth = 1; else new_queue_depth = sdev->queue_depth - - new_queue_depth; + num_rsrc_err; scsi_change_queue_depth(sdev, new_queue_depth); } } lpfc_destroy_vport_work_array(phba, vports); atomic_set(&phba->num_rsrc_err, 0); - atomic_set(&phba->num_cmd_success, 0); } /** @@ -5336,16 +5331,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) } err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd); } else { - if (vport->phba->cfg_enable_bg) { - lpfc_printf_vlog(vport, - KERN_INFO, LOG_SCSI_CMD, - "9038 BLKGRD: rcvd PROT_NORMAL cmd: " - "x%x reftag x%x cnt %u pt %x\n", - cmnd->cmnd[0], - scsi_prot_ref_tag(cmnd), - scsi_logical_block_count(cmnd), - (cmnd->cmnd[1]>>5)); - } err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd); } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1f8a9b5945cb..a028e008dd1e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1217,9 +1217,9 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, empty = list_empty(&phba->active_rrq_list); list_add_tail(&rrq->list, &phba->active_rrq_list); phba->hba_flag |= HBA_RRQ_ACTIVE; + spin_unlock_irqrestore(&phba->hbalock, iflags); if (empty) lpfc_worker_wake_up(phba); - spin_unlock_irqrestore(&phba->hbalock, iflags); return 0; out: spin_unlock_irqrestore(&phba->hbalock, iflags); @@ -2830,7 +2830,7 @@ lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) */ pmboxq->mbox_flag |= LPFC_MBX_WAKE; spin_lock_irqsave(&phba->hbalock, drvr_flag); - pmbox_done = (struct completion *)pmboxq->context3; + pmbox_done = pmboxq->ctx_u.mbox_wait; if (pmbox_done) complete(pmbox_done); spin_unlock_irqrestore(&phba->hbalock, drvr_flag); @@ -2885,7 +2885,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) && pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 && !pmb->u.mb.mbxStatus) { - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; if (mp) { pmb->ctx_buf = NULL; lpfc_mbuf_free(phba, mp->virt, mp->phys); @@ -2914,12 +2914,12 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + ndlp = pmb->ctx_ndlp; lpfc_nlp_put(ndlp); } if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) { - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + ndlp = pmb->ctx_ndlp; /* Check to see if there are any deferred events to process */ if (ndlp) { @@ -2952,7 +2952,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) /* This nlp_put pairs with lpfc_sli4_resume_rpi */ if (pmb->u.mb.mbxCommand == MBX_RESUME_RPI) { - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + ndlp = pmb->ctx_ndlp; lpfc_nlp_put(ndlp); } @@ -5819,7 +5819,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba) goto out_free_mboxq; } - mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; + mp = mboxq->ctx_buf; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, @@ -6849,9 +6849,9 @@ lpfc_ras_stop_fwlog(struct lpfc_hba *phba) { struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = INACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); /* Disable FW logging to host memory */ writel(LPFC_CTL_PDEV_CTL_DDL_RAS, @@ -6894,9 +6894,9 @@ lpfc_sli4_ras_dma_free(struct lpfc_hba *phba) ras_fwlog->lwpd.virt = NULL; } - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = INACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); } /** @@ -6998,9 +6998,9 @@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) goto disable_ras; } - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = ACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); mempool_free(pmb, phba->mbox_mem_pool); return; @@ -7032,9 +7032,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, uint32_t len = 0, fwlog_buffsize, fwlog_entry_count; int rc = 0; - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = INACTIVE; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); fwlog_buffsize = (LPFC_RAS_MIN_BUFF_POST_SIZE * phba->cfg_ras_fwlog_buffsize); @@ -7095,9 +7095,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, mbx_fwlog->u.request.lwpd.addr_lo = putPaddrLow(ras_fwlog->lwpd.phys); mbx_fwlog->u.request.lwpd.addr_hi = putPaddrHigh(ras_fwlog->lwpd.phys); - spin_lock_irq(&phba->hbalock); + spin_lock_irq(&phba->ras_fwlog_lock); ras_fwlog->state = REG_INPROGRESS; - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->ras_fwlog_lock); mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_sli4_ras_mbox_cmpl; @@ -8766,7 +8766,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) mboxq->vport = vport; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; + mp = mboxq->ctx_buf; if (rc == MBX_SUCCESS) { memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm)); rc = 0; @@ -9548,8 +9548,8 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, } /* Copy the mailbox extension data */ - if (pmbox->in_ext_byte_len && pmbox->ctx_buf) { - lpfc_sli_pcimem_bcopy(pmbox->ctx_buf, + if (pmbox->in_ext_byte_len && pmbox->ext_buf) { + lpfc_sli_pcimem_bcopy(pmbox->ext_buf, (uint8_t *)phba->mbox_ext, pmbox->in_ext_byte_len); } @@ -9562,10 +9562,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, = MAILBOX_HBA_EXT_OFFSET; /* Copy the mailbox extension data */ - if (pmbox->in_ext_byte_len && pmbox->ctx_buf) + if (pmbox->in_ext_byte_len && pmbox->ext_buf) lpfc_memcpy_to_slim(phba->MBslimaddr + MAILBOX_HBA_EXT_OFFSET, - pmbox->ctx_buf, pmbox->in_ext_byte_len); + pmbox->ext_buf, pmbox->in_ext_byte_len); if (mbx->mbxCommand == MBX_CONFIG_PORT) /* copy command data into host mbox for cmpl */ @@ -9688,9 +9688,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ - if (pmbox->out_ext_byte_len && pmbox->ctx_buf) { + if (pmbox->out_ext_byte_len && pmbox->ext_buf) { lpfc_sli_pcimem_bcopy(phba->mbox_ext, - pmbox->ctx_buf, + pmbox->ext_buf, pmbox->out_ext_byte_len); } } else { @@ -9698,9 +9698,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_memcpy_from_slim(mbx, phba->MBslimaddr, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ - if (pmbox->out_ext_byte_len && pmbox->ctx_buf) { + if (pmbox->out_ext_byte_len && pmbox->ext_buf) { lpfc_memcpy_from_slim( - pmbox->ctx_buf, + pmbox->ext_buf, phba->MBslimaddr + MAILBOX_HBA_EXT_OFFSET, pmbox->out_ext_byte_len); @@ -11373,18 +11373,18 @@ lpfc_sli_post_recovery_event(struct lpfc_hba *phba, unsigned long iflags; struct lpfc_work_evt *evtp = &ndlp->recovery_evt; + /* Hold a node reference for outstanding queued work */ + if (!lpfc_nlp_get(ndlp)) + return; + spin_lock_irqsave(&phba->hbalock, iflags); if (!list_empty(&evtp->evt_listp)) { spin_unlock_irqrestore(&phba->hbalock, iflags); + lpfc_nlp_put(ndlp); return; } - /* Incrementing the reference count until the queued work is done. */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); - if (!evtp->evt_arg1) { - spin_unlock_irqrestore(&phba->hbalock, iflags); - return; - } + evtp->evt_arg1 = ndlp; evtp->evt = LPFC_EVT_RECOVER_PORT; list_add_tail(&evtp->evt_listp, &phba->work_list); spin_unlock_irqrestore(&phba->hbalock, iflags); @@ -13262,9 +13262,9 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq, /* setup wake call as IOCB callback */ pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait; - /* setup context3 field to pass wait_queue pointer to wake function */ + /* setup ctx_u field to pass wait_queue pointer to wake function */ init_completion(&mbox_done); - pmboxq->context3 = &mbox_done; + pmboxq->ctx_u.mbox_wait = &mbox_done; /* now issue the command */ retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT); if (retval == MBX_BUSY || retval == MBX_SUCCESS) { @@ -13272,7 +13272,7 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq, msecs_to_jiffies(timeout * 1000)); spin_lock_irqsave(&phba->hbalock, flag); - pmboxq->context3 = NULL; + pmboxq->ctx_u.mbox_wait = NULL; /* * if LPFC_MBX_WAKE flag is set the mailbox is completed * else do not free the resources. @@ -13813,10 +13813,10 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE); if (pmb->out_ext_byte_len && - pmb->ctx_buf) + pmb->ext_buf) lpfc_sli_pcimem_bcopy( phba->mbox_ext, - pmb->ctx_buf, + pmb->ext_buf, pmb->out_ext_byte_len); } if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) { @@ -13830,10 +13830,8 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) pmbox->un.varWords[0], 0); if (!pmbox->mbxStatus) { - mp = (struct lpfc_dmabuf *) - (pmb->ctx_buf); - ndlp = (struct lpfc_nodelist *) - pmb->ctx_ndlp; + mp = pmb->ctx_buf; + ndlp = pmb->ctx_ndlp; /* Reg_LOGIN of dflt RPI was * successful. new lets get @@ -14340,8 +14338,8 @@ lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe) mcqe_status, pmbox->un.varWords[0], 0); if (mcqe_status == MB_CQE_STATUS_SUCCESS) { - mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); - ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + mp = pmb->ctx_buf; + ndlp = pmb->ctx_ndlp; /* Reg_LOGIN of dflt RPI was successful. Mark the * node as having an UNREG_LOGIN in progress to stop @@ -19823,14 +19821,15 @@ lpfc_sli4_remove_rpis(struct lpfc_hba *phba) * lpfc_sli4_resume_rpi - Remove the rpi bitmask region * @ndlp: pointer to lpfc nodelist data structure. * @cmpl: completion call-back. - * @arg: data to load as MBox 'caller buffer information' + * @iocbq: data to load as mbox ctx_u information * * This routine is invoked to remove the memory region that * provided rpi via a bitmask. **/ int lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp, - void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *arg) + void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), + struct lpfc_iocbq *iocbq) { LPFC_MBOXQ_t *mboxq; struct lpfc_hba *phba = ndlp->phba; @@ -19859,7 +19858,7 @@ lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp, lpfc_resume_rpi(mboxq, ndlp); if (cmpl) { mboxq->mbox_cmpl = cmpl; - mboxq->ctx_buf = arg; + mboxq->ctx_u.save_iocb = iocbq; } else mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; mboxq->ctx_ndlp = ndlp; @@ -20676,7 +20675,7 @@ lpfc_sli4_get_config_region23(struct lpfc_hba *phba, char *rgn23_data) if (lpfc_sli4_dump_cfg_rg23(phba, mboxq)) goto out; mqe = &mboxq->u.mqe; - mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; + mp = mboxq->ctx_buf; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); if (rc) goto out; @@ -21035,7 +21034,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) (mb->u.mb.mbxCommand == MBX_REG_VPI)) mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - act_mbx_ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; + act_mbx_ndlp = mb->ctx_ndlp; /* This reference is local to this routine. The * reference is removed at routine exit. @@ -21064,7 +21063,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; + ndlp = mb->ctx_ndlp; /* Unregister the RPI when mailbox complete */ mb->mbox_flag |= LPFC_MBX_IMED_UNREG; restart_loop = 1; @@ -21084,7 +21083,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) while (!list_empty(&mbox_cmd_list)) { list_remove_head(&mbox_cmd_list, mb, LPFC_MBOXQ_t, list); if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; + ndlp = mb->ctx_ndlp; mb->ctx_ndlp = NULL; if (ndlp) { spin_lock(&ndlp->lock); diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index c911a39cb46b..cf7c42ec0306 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2004-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -182,11 +182,29 @@ typedef struct lpfcMboxq { struct lpfc_mqe mqe; } u; struct lpfc_vport *vport; /* virtual port pointer */ - void *ctx_ndlp; /* an lpfc_nodelist pointer */ - void *ctx_buf; /* an lpfc_dmabuf pointer */ - void *context3; /* a generic pointer. Code must - * accommodate the actual datatype. - */ + struct lpfc_nodelist *ctx_ndlp; /* caller ndlp pointer */ + struct lpfc_dmabuf *ctx_buf; /* caller buffer information */ + void *ext_buf; /* extended buffer for extended mbox + * cmds. Not a generic pointer. + * Use for storing virtual address. + */ + + /* Pointers that are seldom used during mbox execution, but require + * a saved context. + */ + union { + unsigned long ox_rx_id; /* Used in els_rsp_rls_acc */ + struct lpfc_rdp_context *rdp; /* Used in get_rdp_info */ + struct lpfc_lcb_context *lcb; /* Used in set_beacon */ + struct completion *mbox_wait; /* Used in issue_mbox_wait */ + struct bsg_job_data *dd_data; /* Used in bsg_issue_mbox_cmpl + * and + * bsg_issue_mbox_ext_handle_job + */ + struct lpfc_iocbq *save_iocb; /* Used in defer_plogi_acc and + * lpfc_mbx_cmpl_resume_rpi + */ + } ctx_u; void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *); uint8_t mbox_flag; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 2541a8fba093..c1e9ec0243ba 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2009-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -1118,8 +1118,9 @@ void lpfc_sli4_free_rpi(struct lpfc_hba *, int); void lpfc_sli4_remove_rpis(struct lpfc_hba *); void lpfc_sli4_async_event_proc(struct lpfc_hba *); void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *); -int lpfc_sli4_resume_rpi(struct lpfc_nodelist *, - void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *); +int lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp, + void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), + struct lpfc_iocbq *iocbq); void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba); void lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd); diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 56f5889dbaf9..915f2f11fb55 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.4.0.0" +#define LPFC_DRIVER_VERSION "14.4.0.1" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 0f79840b9498..4439167a5188 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -166,7 +166,7 @@ lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport) } } - mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + mp = pmb->ctx_buf; memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm)); memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, sizeof (struct lpfc_name)); @@ -674,10 +674,6 @@ lpfc_vport_delete(struct fc_vport *fc_vport) lpfc_free_sysfs_attr(vport); lpfc_debugfs_terminate(vport); - /* Remove FC host to break driver binding. */ - fc_remove_host(shost); - scsi_remove_host(shost); - /* Send the DA_ID and Fabric LOGO to cleanup Nameserver entries. */ ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) @@ -721,6 +717,10 @@ lpfc_vport_delete(struct fc_vport *fc_vport) skip_logo: + /* Remove FC host to break driver binding. */ + fc_remove_host(shost); + scsi_remove_host(shost); + lpfc_cleanup(vport); /* Remove scsi host now. The nodes are cleaned up. */ diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 0380996b5ad2..55d590b91947 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -1644,7 +1644,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) if ((mpirep_offset != 0xFF) && drv_bufs[mpirep_offset].bsg_buf_len) { drv_buf_iter = &drv_bufs[mpirep_offset]; - drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) - 1 + + drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) + mrioc->reply_sz); bsg_reply_buf = kzalloc(drv_buf_iter->kern_buf_len, GFP_KERNEL); diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index ca2e932dd9b7..f684eb5e0489 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrb_devstate_name(ldev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); } else { struct myrb_pdev_state *pdev_info = sdev->hostdata; @@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev, else name = myrb_devstate_name(pdev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->state); } return ret; @@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev, name = myrb_raidlevel_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } - return snprintf(buf, 32, "Physical Drive\n"); + return snprintf(buf, 64, "Physical Drive\n"); } static DEVICE_ATTR_RO(raid_level); @@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < myrb_logical_channel(sdev->host)) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); status = myrb_get_rbld_progress(cb, &rbld_buf); if (rbld_buf.ldev_num != sdev->id || status != MYRB_STATUS_SUCCESS) - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); - return snprintf(buf, 32, "rebuilding block %u of %u\n", + return snprintf(buf, 64, "rebuilding block %u of %u\n", rbld_buf.ldev_size - rbld_buf.blocks_left, rbld_buf.ldev_size); } diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index a1eec65a9713..e824be9d9bbb 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrs_devstate_name(ldev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else { struct myrs_pdev_info *pdev_info; @@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev, pdev_info = sdev->hostdata; name = myrs_devstate_name(pdev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->dev_state); } return ret; @@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev, ldev_info = sdev->hostdata; name = myrs_raid_level_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else name = myrs_raid_level_name(MYRS_RAID_PHYSICAL); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } static DEVICE_ATTR_RO(raid_level); @@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); ldev_info = sdev->hostdata; ldev_num = ldev_info->ldev_num; @@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev, return -EIO; } if (ldev_info->rbld_active) { - return snprintf(buf, 32, "rebuilding block %zu of %zu\n", + return snprintf(buf, 64, "rebuilding block %zu of %zu\n", (size_t)ldev_info->rbld_lba, (size_t)ldev_info->cfg_devsize); } else - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); } static ssize_t rebuild_store(struct device *dev, @@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev, unsigned short ldev_num; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not checking\n"); + return snprintf(buf, 64, "physical device - not checking\n"); ldev_info = sdev->hostdata; if (!ldev_info) @@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev, ldev_num = ldev_info->ldev_num; myrs_get_ldev_info(cs, ldev_num, ldev_info); if (ldev_info->cc_active) - return snprintf(buf, 32, "checking block %zu of %zu\n", + return snprintf(buf, 64, "checking block %zu of %zu\n", (size_t)ldev_info->cc_lba, (size_t)ldev_info->cfg_devsize); else - return snprintf(buf, 32, "not checking\n"); + return snprintf(buf, 64, "not checking\n"); } static ssize_t consistency_check_store(struct device *dev, diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index e8bcc3a88732..0614b7e366b7 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -61,7 +61,9 @@ static atomic_t pmcraid_adapter_count = ATOMIC_INIT(0); * pmcraid_minor - minor number(s) to use */ static unsigned int pmcraid_major; -static struct class *pmcraid_class; +static const struct class pmcraid_class = { + .name = PMCRAID_DEVFILE, +}; static DECLARE_BITMAP(pmcraid_minor, PMCRAID_MAX_ADAPTERS); /* @@ -4723,7 +4725,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance) if (error) pmcraid_release_minor(minor); else - device_create(pmcraid_class, NULL, MKDEV(pmcraid_major, minor), + device_create(&pmcraid_class, NULL, MKDEV(pmcraid_major, minor), NULL, "%s%u", PMCRAID_DEVFILE, minor); return error; } @@ -4739,7 +4741,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance) static void pmcraid_release_chrdev(struct pmcraid_instance *pinstance) { pmcraid_release_minor(MINOR(pinstance->cdev.dev)); - device_destroy(pmcraid_class, + device_destroy(&pmcraid_class, MKDEV(pmcraid_major, MINOR(pinstance->cdev.dev))); cdev_del(&pinstance->cdev); } @@ -5390,10 +5392,10 @@ static int __init pmcraid_init(void) } pmcraid_major = MAJOR(dev); - pmcraid_class = class_create(PMCRAID_DEVFILE); - if (IS_ERR(pmcraid_class)) { - error = PTR_ERR(pmcraid_class); + error = class_register(&pmcraid_class); + + if (error) { pmcraid_err("failed to register with sysfs, error = %x\n", error); goto out_unreg_chrdev; @@ -5402,7 +5404,7 @@ static int __init pmcraid_init(void) error = pmcraid_netlink_init(); if (error) { - class_destroy(pmcraid_class); + class_unregister(&pmcraid_class); goto out_unreg_chrdev; } @@ -5413,7 +5415,7 @@ static int __init pmcraid_init(void) pmcraid_err("failed to register pmcraid driver, error = %x\n", error); - class_destroy(pmcraid_class); + class_unregister(&pmcraid_class); pmcraid_netlink_release(); out_unreg_chrdev: @@ -5432,7 +5434,7 @@ static void __exit pmcraid_exit(void) unregister_chrdev_region(MKDEV(pmcraid_major, 0), PMCRAID_MAX_ADAPTERS); pci_unregister_driver(&pmcraid_driver); - class_destroy(pmcraid_class); + class_unregister(&pmcraid_class); } module_init(pmcraid_init); diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 44449c70a375..76eeba435fd0 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2741,7 +2741,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport) return; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900c, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(fcport->vha); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } } @@ -2763,7 +2769,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900b, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(vha); qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET); return; diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index deb642607deb..2f49baf131e2 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -82,7 +82,7 @@ typedef union { #include "qla_nvme.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" -#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc." +#define QLA2XXX_MANUFACTURER "Marvell" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 09cb9413670a..7309310d2ab9 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *); extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *); extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t); -extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool); +extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *); extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha, struct els_plogi *els_plogi); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index a314cfc5b263..8377624d76c9 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1193,8 +1193,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); + /* + * use qla24xx_async_gnl_sp_done to purge all pending gnl request. + * kref_put is call behind the scene. + */ + sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR; + qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR); fcport->flags &= ~(FCF_ASYNC_SENT); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE); @@ -2665,6 +2669,40 @@ exit: return rval; } +static void qla_enable_fce_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->fce) { + ha->flags.fce_enabled = 1; + memset(ha->fce, 0, fce_calc_size(ha->fce_bufs)); + rval = qla2x00_enable_fce_trace(vha, + ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8033, + "Unable to reinitialize FCE (%d).\n", rval); + ha->flags.fce_enabled = 0; + } + } +} + +static void qla_enable_eft_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->eft) { + memset(ha->eft, 0, EFT_SIZE); + rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8034, + "Unable to reinitialize EFT (%d).\n", rval); + } + } +} /* * qla2x00_initialize_adapter * Initialize board. @@ -3668,9 +3706,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha) } static void -qla2x00_init_fce_trace(scsi_qla_host_t *vha) +qla2x00_alloc_fce_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3699,27 +3736,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS, - ha->fce_mb, &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x00bf, - "Unable to initialize FCE (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c0, "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024); - ha->flags.fce_enabled = 1; ha->fce_dma = tc_dma; ha->fce = tc; + ha->fce_bufs = FCE_NUM_BUFFERS; } static void -qla2x00_init_eft_trace(scsi_qla_host_t *vha) +qla2x00_alloc_eft_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3744,14 +3771,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x00c2, - "Unable to initialize EFT (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c3, "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024); @@ -3759,13 +3778,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) ha->eft = tc; } -static void -qla2x00_alloc_offload_mem(scsi_qla_host_t *vha) -{ - qla2x00_init_fce_trace(vha); - qla2x00_init_eft_trace(vha); -} - void qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) { @@ -3820,10 +3832,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); - qla2x00_init_fce_trace(vha); + qla2x00_alloc_fce_trace(vha); if (ha->fce) fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE; - qla2x00_init_eft_trace(vha); + qla2x00_alloc_eft_trace(vha); if (ha->eft) eft_size = EFT_SIZE; } @@ -4253,7 +4265,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; unsigned long flags; - uint16_t fw_major_version; int done_once = 0; if (IS_P3P_TYPE(ha)) { @@ -4320,7 +4331,6 @@ execute_fw_with_lr: goto failed; enable_82xx_npiv: - fw_major_version = ha->fw_major_version; if (IS_P3P_TYPE(ha)) qla82xx_check_md_needed(vha); else @@ -4349,12 +4359,11 @@ enable_82xx_npiv: if (rval != QLA_SUCCESS) goto failed; - if (!fw_major_version && !(IS_P3P_TYPE(ha))) - qla2x00_alloc_offload_mem(vha); - if (ql2xallocfwdump && !(IS_P3P_TYPE(ha))) qla2x00_alloc_fw_dump(vha); + qla_enable_fce_trace(vha); + qla_enable_eft_trace(vha); } else { goto failed; } @@ -7487,12 +7496,12 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) int qla2x00_abort_isp(scsi_qla_host_t *vha) { - int rval; uint8_t status = 0; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp, *tvp; struct req_que *req = ha->req_q_map[0]; unsigned long flags; + fc_port_t *fcport; if (vha->flags.online) { qla2x00_abort_isp_cleanup(vha); @@ -7561,6 +7570,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n"); return status; } + + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + if (!qla2x00_restart_isp(vha)) { clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags); @@ -7581,31 +7599,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) qla2x00_get_fw_version(vha); - if (ha->fce) { - ha->flags.fce_enabled = 1; - memset(ha->fce, 0, - fce_calc_size(ha->fce_bufs)); - rval = qla2x00_enable_fce_trace(vha, - ha->fce_dma, ha->fce_bufs, ha->fce_mb, - &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x8033, - "Unable to reinitialize FCE " - "(%d).\n", rval); - ha->flags.fce_enabled = 0; - } - } - if (ha->eft) { - memset(ha->eft, 0, EFT_SIZE); - rval = qla2x00_enable_eft_trace(vha, - ha->eft_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x8034, - "Unable to reinitialize EFT " - "(%d).\n", rval); - } - } } else { /* failed the ISP abort */ vha->flags.online = 1; if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { @@ -7655,6 +7649,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) atomic_inc(&vp->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vp->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + qla2x00_vp_abort_isp(vp); spin_lock_irqsave(&ha->vport_slock, flags); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index df90169f8244..0b41e8a06602 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2587,6 +2587,33 @@ void qla2x00_sp_release(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); + struct scsi_qla_host *vha = sp->vha; + + switch (sp->type) { + case SRB_CT_PTHRU_CMD: + /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */ + if (sp->u.iocb_cmd.u.ctarg.req && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } + if (sp->u.iocb_cmd.u.ctarg.rsp && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + break; + default: + break; + } sp->free(sp); } @@ -2610,7 +2637,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp) { struct srb_iocb *elsio = &sp->u.iocb_cmd; - kfree(sp->fcport); + if (sp->fcport) + qla2x00_free_fcport(sp->fcport); if (elsio->u.els_logo.els_logo_pyld) dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE, @@ -2692,7 +2720,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { - kfree(fcport); + qla2x00_free_fcport(fcport); ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); return -ENOMEM; @@ -2723,6 +2751,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2747,6 +2776,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -3012,7 +3042,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, - fc_port_t *fcport, bool wait) + fc_port_t *fcport) { srb_t *sp; struct srb_iocb *elsio = NULL; @@ -3027,8 +3057,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!sp) { ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); - fcport->flags &= ~FCF_ASYNC_ACTIVE; - return -ENOMEM; + goto done; } fcport->flags |= FCF_ASYNC_SENT; @@ -3037,9 +3066,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - if (wait) - sp->flags = SRB_WAKEUP_ON_COMP; - sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; @@ -3055,7 +3081,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_plogi_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } resp_ptr = elsio->u.els_plogi.els_resp_pyld = @@ -3064,7 +3090,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_resp_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr); @@ -3080,7 +3106,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) { struct fc_els_flogi *p = ptr; - p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC); } @@ -3089,10 +3114,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, sizeof(*elsio->u.els_plogi.els_plogi_pyld)); - init_completion(&elsio->u.els_plogi.comp); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - rval = QLA_FUNCTION_FAILED; + fcport->flags |= FCF_LOGIN_NEEDED; + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + goto done_free_sp; } else { ql_dbg(ql_dbg_disc, vha, 0x3074, "%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n", @@ -3100,21 +3126,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b24, vha->d_id.b24); } - if (wait) { - wait_for_completion(&elsio->u.els_plogi.comp); - - if (elsio->u.els_plogi.comp_status != CS_COMPLETE) - rval = QLA_FUNCTION_FAILED; - } else { - goto done; - } + return rval; -out: - fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); +done_free_sp: qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); done: + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); + qla2x00_set_fcport_disc_state(fcport, DSC_DELETED); return rval; } @@ -3918,7 +3938,7 @@ qla2x00_start_sp(srb_t *sp) return -EAGAIN; } - pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp); if (!pkt) { rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 21ec32b4fb28..0cd6f3e14882 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || ha->flags.eeh_busy) { ql_log(ql_log_warn, vha, 0xd035, - "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index dd674378f2f3..1e2f52210f60 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4602,6 +4602,7 @@ fail_free_init_cb: ha->init_cb_dma = 0; fail_free_vp_map: kfree(ha->vp_map); + ha->vp_map = NULL; fail: ql_log(ql_log_fatal, NULL, 0x0030, "Memory allocation failure.\n"); @@ -5583,7 +5584,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) break; case QLA_EVT_ELS_PLOGI: qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI, - e->u.fcport.fcport, false); + e->u.fcport.fcport); break; case QLA_EVT_SA_REPLACE: rc = qla24xx_issue_sa_replace_iocb(vha, e); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2ef2dbac0db2..d7551b1443e4 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1062,6 +1062,16 @@ void qlt_free_session_done(struct work_struct *work) "%s: sess %p logout completed\n", __func__, sess); } + /* check for any straggling io left behind */ + if (!(sess->flags & FCF_FCP2_DEVICE) && + qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x3027, + "IO not return. Resetting.\n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } + if (sess->logo_ack_needed) { sess->logo_ack_needed = 0; qla24xx_async_notify_ack(vha, sess, diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index d903563e969e..7627fd807bc3 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -6,9 +6,9 @@ /* * Driver version */ -#define QLA2XXX_VERSION "10.02.09.100-k" +#define QLA2XXX_VERSION "10.02.09.200-k" #define QLA_DRIVER_MAJOR_VER 10 #define QLA_DRIVER_MINOR_VER 2 #define QLA_DRIVER_PATCH_VER 9 -#define QLA_DRIVER_BETA_VER 100 +#define QLA_DRIVER_BETA_VER 200 diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8d06475de17a..ffd7e7e72933 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); +int scsi_resume_device(struct scsi_device *sdev) +{ + struct device *dev = &sdev->sdev_gendev; + int ret = 0; + + device_lock(dev); + + /* + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { + ret = -EWOULDBLOCK; + goto unlock; + } + + if (dev->driver && try_module_get(dev->driver->owner)) { + struct scsi_driver *drv = to_scsi_driver(dev->driver); + + if (drv->resume) + ret = drv->resume(dev); + module_put(dev->driver->owner); + } + +unlock: + device_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(scsi_resume_device); + int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ccff8f2e2e75..58fdf679341d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3920,7 +3920,7 @@ static int sd_probe(struct device *dev) error = device_add_disk(dev, gd, NULL); if (error) { - put_device(&sdkp->disk_dev); + device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } @@ -4108,7 +4108,21 @@ static int sd_suspend_runtime(struct device *dev) return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev, bool runtime) +static int sd_resume(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + + if (opal_unlock_from_suspend(sdkp->opal_dev)) { + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); + return -EIO; + } + + return 0; +} + +static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret; @@ -4124,7 +4138,7 @@ static int sd_resume(struct device *dev, bool runtime) sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { - opal_unlock_from_suspend(sdkp->opal_dev); + sd_resume(dev); sdkp->suspended = false; } @@ -4143,7 +4157,7 @@ static int sd_resume_system(struct device *dev) return 0; } - return sd_resume(dev, false); + return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -4170,7 +4184,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev, true); + return sd_resume_common(dev, true); } static const struct dev_pm_ops sd_pm_ops = { @@ -4193,6 +4207,7 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 86210e4dd0d3..386981c6976a 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1424,7 +1424,9 @@ static const struct file_operations sg_fops = { .llseek = no_llseek, }; -static struct class *sg_sysfs_class; +static const struct class sg_sysfs_class = { + .name = "scsi_generic" +}; static int sg_sysfs_valid = 0; @@ -1526,7 +1528,7 @@ sg_add_device(struct device *cl_dev) if (sg_sysfs_valid) { struct device *sg_class_member; - sg_class_member = device_create(sg_sysfs_class, cl_dev->parent, + sg_class_member = device_create(&sg_sysfs_class, cl_dev->parent, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), sdp, "%s", sdp->name); @@ -1616,7 +1618,7 @@ sg_remove_device(struct device *cl_dev) read_unlock_irqrestore(&sdp->sfd_lock, iflags); sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic"); - device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); + device_destroy(&sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); cdev_del(sdp->cdev); sdp->cdev = NULL; @@ -1687,11 +1689,9 @@ init_sg(void) SG_MAX_DEVS, "sg"); if (rc) return rc; - sg_sysfs_class = class_create("scsi_generic"); - if ( IS_ERR(sg_sysfs_class) ) { - rc = PTR_ERR(sg_sysfs_class); + rc = class_register(&sg_sysfs_class); + if (rc) goto err_out; - } sg_sysfs_valid = 1; rc = scsi_register_interface(&sg_interface); if (0 == rc) { @@ -1700,7 +1700,7 @@ init_sg(void) #endif /* CONFIG_SCSI_PROC_FS */ return 0; } - class_destroy(sg_sysfs_class); + class_unregister(&sg_sysfs_class); register_sg_sysctls(); err_out: unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); @@ -1715,7 +1715,7 @@ exit_sg(void) remove_proc_subtree("scsi/sg", NULL); #endif /* CONFIG_SCSI_PROC_FS */ scsi_unregister_interface(&sg_interface); - class_destroy(sg_sysfs_class); + class_unregister(&sg_sysfs_class); sg_sysfs_valid = 0; unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); @@ -2207,6 +2207,7 @@ sg_remove_sfp_usercontext(struct work_struct *work) { struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); struct sg_device *sdp = sfp->parentdp; + struct scsi_device *device = sdp->device; Sg_request *srp; unsigned long iflags; @@ -2232,8 +2233,9 @@ sg_remove_sfp_usercontext(struct work_struct *work) "sg_remove_sfp: sfp=0x%p\n", sfp)); kfree(sfp); - scsi_device_put(sdp->device); + WARN_ON_ONCE(kref_read(&sdp->d_ref) != 1); kref_put(&sdp->d_ref, sg_device_destroy); + scsi_device_put(device); module_put(THIS_MODULE); } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 338aa8c42968..5a9bcf8e0792 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -87,7 +87,7 @@ static int try_rdio = 1; static int try_wdio = 1; static int debug_flag; -static struct class st_sysfs_class; +static const struct class st_sysfs_class; static const struct attribute_group *st_dev_groups[]; static const struct attribute_group *st_drv_groups[]; @@ -4438,7 +4438,7 @@ static void scsi_tape_release(struct kref *kref) return; } -static struct class st_sysfs_class = { +static const struct class st_sysfs_class = { .name = "scsi_tape", .dev_groups = st_dev_groups, }; diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 079035db7dd8..92a662d1b55c 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -852,39 +852,39 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(fsl_lpspi->base)) { ret = PTR_ERR(fsl_lpspi->base); - goto out_controller_put; + return ret; } fsl_lpspi->base_phys = res->start; irq = platform_get_irq(pdev, 0); if (irq < 0) { ret = irq; - goto out_controller_put; + return ret; } ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0, dev_name(&pdev->dev), fsl_lpspi); if (ret) { dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret); - goto out_controller_put; + return ret; } fsl_lpspi->clk_per = devm_clk_get(&pdev->dev, "per"); if (IS_ERR(fsl_lpspi->clk_per)) { ret = PTR_ERR(fsl_lpspi->clk_per); - goto out_controller_put; + return ret; } fsl_lpspi->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); if (IS_ERR(fsl_lpspi->clk_ipg)) { ret = PTR_ERR(fsl_lpspi->clk_ipg); - goto out_controller_put; + return ret; } /* enable the clock */ ret = fsl_lpspi_init_rpm(fsl_lpspi); if (ret) - goto out_controller_put; + return ret; ret = pm_runtime_get_sync(fsl_lpspi->dev); if (ret < 0) { @@ -945,8 +945,6 @@ out_pm_get: pm_runtime_dont_use_autosuspend(fsl_lpspi->dev); pm_runtime_put_sync(fsl_lpspi->dev); pm_runtime_disable(fsl_lpspi->dev); -out_controller_put: - spi_controller_put(controller); return ret; } diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index 969965d7bc98..cc18d320370f 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -725,6 +725,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev, sizeof(struct pci1xxxx_spi_internal), GFP_KERNEL); + if (!spi_bus->spi_int[iter]) + return -ENOMEM; spi_sub_ptr = spi_bus->spi_int[iter]; spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller)); if (!spi_sub_ptr->spi_host) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 9fcbe040cb2f..f726d8670428 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -430,7 +430,7 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host, struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host); if (sdd->rx_dma.ch && sdd->tx_dma.ch) - return xfer->len > sdd->fifo_depth; + return xfer->len >= sdd->fifo_depth; return false; } @@ -826,10 +826,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host, return status; } - if (!is_polling(sdd) && (xfer->len > fifo_len) && + if (!is_polling(sdd) && xfer->len >= fifo_len && sdd->rx_dma.ch && sdd->tx_dma.ch) { use_dma = 1; - } else if (xfer->len >= fifo_len) { tx_buf = xfer->tx_buf; rx_buf = xfer->rx_buf; diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index 258aa0e37f55..4c3684dd902e 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -937,8 +937,9 @@ static int create_component(struct vchiq_mmal_instance *instance, /* build component create message */ m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE; m.u.component_create.client_component = component->client_component; - strncpy(m.u.component_create.name, name, - sizeof(m.u.component_create.name)); + strscpy_pad(m.u.component_create.name, name, + sizeof(m.u.component_create.name)); + m.u.component_create.pid = 0; ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 679720021183..d9a6242264b7 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -583,7 +583,7 @@ int iscsit_dataout_datapduinorder_no_fbit( struct iscsi_pdu *pdu) { int i, send_recovery_r2t = 0, recovery = 0; - u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0; + u32 length = 0, offset = 0, pdu_count = 0; struct iscsit_conn *conn = cmd->conn; struct iscsi_pdu *first_pdu = NULL; @@ -596,7 +596,6 @@ int iscsit_dataout_datapduinorder_no_fbit( if (cmd->pdu_list[i].seq_no == pdu->seq_no) { if (!first_pdu) first_pdu = &cmd->pdu_list[i]; - xfer_len += cmd->pdu_list[i].length; pdu_count++; } else if (pdu_count) break; diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 50dec24e967a..8fd7cf1932cd 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -214,7 +214,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd res = dfc->power_ops->get_real_power(df, power, freq, voltage); if (!res) { - state = dfc->capped_state; + state = dfc->max_state - dfc->capped_state; /* Convert EM power into milli-Watts first */ rcu_read_lock(); diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 1b17dc4c219c..e25e48d76aa7 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -606,7 +606,7 @@ static int allocate_actors_buffer(struct power_allocator_params *params, /* There might be no cooling devices yet. */ if (!num_actors) { - ret = -EINVAL; + ret = 0; goto clean_state; } @@ -679,11 +679,6 @@ static int power_allocator_bind(struct thermal_zone_device *tz) return -ENOMEM; get_governor_trips(tz, params); - if (!params->trip_max) { - dev_warn(&tz->device, "power_allocator: missing trip_max\n"); - kfree(params); - return -EINVAL; - } ret = check_power_actors(tz, params); if (ret < 0) { @@ -714,9 +709,10 @@ static int power_allocator_bind(struct thermal_zone_device *tz) else params->sustainable_power = tz->tzp->sustainable_power; - estimate_pid_constants(tz, tz->tzp->sustainable_power, - params->trip_switch_on, - params->trip_max->temperature); + if (params->trip_max) + estimate_pid_constants(tz, tz->tzp->sustainable_power, + params->trip_switch_on, + params->trip_max->temperature); reset_pid_controller(params); diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index 09f6050dd041..497abf0d47ca 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -65,7 +65,6 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz) { const struct thermal_trip *trip; int low = -INT_MAX, high = INT_MAX; - bool same_trip = false; int ret; lockdep_assert_held(&tz->lock); @@ -74,36 +73,22 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz) return; for_each_trip(tz, trip) { - bool low_set = false; int trip_low; trip_low = trip->temperature - trip->hysteresis; - if (trip_low < tz->temperature && trip_low > low) { + if (trip_low < tz->temperature && trip_low > low) low = trip_low; - low_set = true; - same_trip = false; - } if (trip->temperature > tz->temperature && - trip->temperature < high) { + trip->temperature < high) high = trip->temperature; - same_trip = low_set; - } } /* No need to change trip points */ if (tz->prev_low_trip == low && tz->prev_high_trip == high) return; - /* - * If "high" and "low" are the same, skip the change unless this is the - * first time. - */ - if (same_trip && (tz->prev_low_trip != -INT_MAX || - tz->prev_high_trip != INT_MAX)) - return; - tz->prev_low_trip = low; tz->prev_high_trip = high; diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 8db81f1a12d5..768bf87cd80d 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -94,7 +94,7 @@ void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds) val = ufshcd_readl(hba, REG_UFS_MCQ_CFG); val &= ~MCQ_CFG_MAC_MASK; - val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds); + val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds - 1); ufshcd_writel(hba, val, REG_UFS_MCQ_CFG); } EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index e30fd125988d..a0f8e930167d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3217,7 +3217,9 @@ retry: /* MCQ mode */ if (is_mcq_enabled(hba)) { - err = ufshcd_clear_cmd(hba, lrbp->task_tag); + /* successfully cleared the command, retry if needed */ + if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0) + err = -EAGAIN; hba->dev_cmd.complete = NULL; return err; } @@ -9791,7 +9793,10 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) /* UFS device & link must be active before we enter in this function */ if (!ufshcd_is_ufs_dev_active(hba) || !ufshcd_is_link_active(hba)) { - ret = -EINVAL; + /* Wait err handler finish or trigger err recovery */ + if (!ufshcd_eh_in_progress(hba)) + ufshcd_force_error_recovery(hba); + ret = -EBUSY; goto enable_scaling; } diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 8d68bd21ae73..06859e17b67b 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1210,8 +1210,10 @@ static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up) list_for_each_entry(clki, head, list) { if (!IS_ERR_OR_NULL(clki->clk) && - !strcmp(clki->name, "core_clk_unipro")) { - if (is_scale_up) + !strcmp(clki->name, "core_clk_unipro")) { + if (!clki->max_freq) + cycles_in_1us = 150; /* default for backwards compatibility */ + else if (is_scale_up) cycles_in_1us = ceil(clki->max_freq, (1000 * 1000)); else cycles_in_1us = ceil(clk_get_rate(clki->clk), (1000 * 1000)); diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index bb77de6fa067..009158fef2a8 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -792,7 +792,7 @@ static int uio_mmap_dma_coherent(struct vm_area_struct *vma) */ vma->vm_pgoff = 0; - addr = (void *)mem->addr; + addr = (void *)(uintptr_t)mem->addr; ret = dma_mmap_coherent(mem->dma_device, vma, addr, diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index d5f9384df125..13cc35ab5d29 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -60,7 +60,7 @@ static int uio_dmem_genirq_open(struct uio_info *info, struct inode *inode) addr = dma_alloc_coherent(&priv->pdev->dev, uiomem->size, &uiomem->dma_addr, GFP_KERNEL); - uiomem->addr = addr ? (phys_addr_t) addr : DMEM_MAP_ERROR; + uiomem->addr = addr ? (uintptr_t) addr : DMEM_MAP_ERROR; ++uiomem; } priv->refcnt++; @@ -89,7 +89,7 @@ static int uio_dmem_genirq_release(struct uio_info *info, struct inode *inode) break; if (uiomem->addr) { dma_free_coherent(uiomem->dma_device, uiomem->size, - (void *) uiomem->addr, + (void *) (uintptr_t) uiomem->addr, uiomem->dma_addr); } uiomem->addr = DMEM_MAP_ERROR; diff --git a/drivers/uio/uio_pruss.c b/drivers/uio/uio_pruss.c index 72b33f7d4c40..f67881cba645 100644 --- a/drivers/uio/uio_pruss.c +++ b/drivers/uio/uio_pruss.c @@ -191,7 +191,7 @@ static int pruss_probe(struct platform_device *pdev) p->mem[1].size = sram_pool_sz; p->mem[1].memtype = UIO_MEM_PHYS; - p->mem[2].addr = (phys_addr_t) gdev->ddr_vaddr; + p->mem[2].addr = (uintptr_t) gdev->ddr_vaddr; p->mem[2].dma_addr = gdev->ddr_paddr; p->mem[2].size = extram_pool_sz; p->mem[2].memtype = UIO_MEM_DMA_COHERENT; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index c553decb5461..c8262e2f2917 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -485,6 +485,7 @@ out_free_mem: static int service_outstanding_interrupt(struct wdm_device *desc) { int rv = 0; + int used; /* submit read urb only if the device is waiting for it */ if (!desc->resp_count || !--desc->resp_count) @@ -499,7 +500,10 @@ static int service_outstanding_interrupt(struct wdm_device *desc) goto out; } - set_bit(WDM_RESPONDING, &desc->flags); + used = test_and_set_bit(WDM_RESPONDING, &desc->flags); + if (used) + goto out; + spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3ee8455585b6..9446660e231b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -130,7 +130,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 -static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, @@ -720,14 +719,14 @@ static void kick_hub_wq(struct usb_hub *hub) */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); - kref_get(&hub->kref); + hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) @@ -1095,7 +1094,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } - kref_get(&hub->kref); + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1343,7 +1342,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } - kref_put(&hub->kref, hub_release); + hub_put(hub); } /* Implement the continuations for the delays above */ @@ -1759,6 +1758,16 @@ static void hub_release(struct kref *kref) kfree(hub); } +void hub_get(struct usb_hub *hub) +{ + kref_get(&hub->kref); +} + +void hub_put(struct usb_hub *hub) +{ + kref_put(&hub->kref, hub_release); +} + static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) @@ -1807,7 +1816,7 @@ static void hub_disconnect(struct usb_interface *intf) onboard_hub_destroy_pdevs(&hub->onboard_hub_devs); - kref_put(&hub->kref, hub_release); + hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) @@ -5934,7 +5943,7 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); kcov_remote_stop(); } diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 43ce21c96a51..183b69dc2955 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -129,6 +129,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub, extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set); extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev); +extern void hub_get(struct usb_hub *hub); +extern void hub_put(struct usb_hub *hub); extern int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected); extern int usb_clear_port_feature(struct usb_device *hdev, diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 5b5e613a11e5..686c01af03e6 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -56,11 +56,22 @@ static ssize_t disable_show(struct device *dev, u16 portstatus, unused; bool disabled; int rc; + struct kernfs_node *kn; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -70,9 +81,13 @@ static ssize_t disable_show(struct device *dev, usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); if (rc) return rc; @@ -90,15 +105,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, int port1 = port_dev->portnum; bool disabled; int rc; + struct kernfs_node *kn; rc = kstrtobool(buf, &disabled); if (rc) return rc; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -119,9 +145,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, if (!rc) rc = count; -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); return rc; } diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index f98263e21c2a..d83231d6736a 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev, { struct usb_interface *intf = to_usb_interface(dev); bool val; + struct kernfs_node *kn; if (kstrtobool(buf, &val) != 0) return -EINVAL; - if (val) + if (val) { usb_authorize_interface(intf); - else - usb_deauthorize_interface(intf); + } else { + /* + * Prevent deadlock if another process is concurrently + * trying to unregister intf. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (kn) { + usb_deauthorize_interface(intf); + sysfs_unbreak_active_protection(kn); + } + } return count; } diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index c92a1da46a01..a141f83aba0c 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -729,8 +729,14 @@ struct dwc2_dregs_backup { * struct dwc2_hregs_backup - Holds host registers state before * entering partial power down * @hcfg: Backup of HCFG register + * @hflbaddr: Backup of HFLBADDR register * @haintmsk: Backup of HAINTMSK register + * @hcchar: Backup of HCCHAR register + * @hcsplt: Backup of HCSPLT register * @hcintmsk: Backup of HCINTMSK register + * @hctsiz: Backup of HCTSIZ register + * @hdma: Backup of HCDMA register + * @hcdmab: Backup of HCDMAB register * @hprt0: Backup of HPTR0 register * @hfir: Backup of HFIR register * @hptxfsiz: Backup of HPTXFSIZ register @@ -738,8 +744,14 @@ struct dwc2_dregs_backup { */ struct dwc2_hregs_backup { u32 hcfg; + u32 hflbaddr; u32 haintmsk; + u32 hcchar[MAX_EPS_CHANNELS]; + u32 hcsplt[MAX_EPS_CHANNELS]; u32 hcintmsk[MAX_EPS_CHANNELS]; + u32 hctsiz[MAX_EPS_CHANNELS]; + u32 hcidma[MAX_EPS_CHANNELS]; + u32 hcidmab[MAX_EPS_CHANNELS]; u32 hprt0; u32 hfir; u32 hptxfsiz; @@ -1086,6 +1098,7 @@ struct dwc2_hsotg { bool needs_byte_swap; /* DWC OTG HW Release versions */ +#define DWC2_CORE_REV_4_30a 0x4f54430a #define DWC2_CORE_REV_2_71a 0x4f54271a #define DWC2_CORE_REV_2_72a 0x4f54272a #define DWC2_CORE_REV_2_80a 0x4f54280a @@ -1323,6 +1336,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg); void dwc2_enable_acg(struct dwc2_hsotg *hsotg); +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup); /* This function should be called on every hardware interrupt. */ irqreturn_t dwc2_handle_common_intr(int irq, void *dev); diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index 158ede753854..26d752a4c3ca 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -297,7 +297,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } @@ -322,10 +323,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) * @hsotg: Programming view of DWC_otg controller * */ -static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup) { u32 glpmcfg; - u32 i = 0; + u32 pcgctl; + u32 dctl; if (hsotg->lx_state != DWC2_L1) { dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n"); @@ -334,37 +336,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) glpmcfg = dwc2_readl(hsotg, GLPMCFG); if (dwc2_is_device_mode(hsotg)) { - dev_dbg(hsotg->dev, "Exit from L1 state\n"); + dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup); glpmcfg &= ~GLPMCFG_ENBLSLPM; - glpmcfg &= ~GLPMCFG_HIRD_THRES_EN; + glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK; dwc2_writel(hsotg, glpmcfg, GLPMCFG); - do { - glpmcfg = dwc2_readl(hsotg, GLPMCFG); + pcgctl = dwc2_readl(hsotg, PCGCTL); + pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING; + dwc2_writel(hsotg, pcgctl, PCGCTL); - if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK | - GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS))) - break; + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_ENBESL) { + glpmcfg |= GLPMCFG_RSTRSLPSTS; + dwc2_writel(hsotg, glpmcfg, GLPMCFG); + } + + if (remotewakeup) { + if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__); + goto fail; + return; + } + + dctl = dwc2_readl(hsotg, DCTL); + dctl |= DCTL_RMTWKUPSIG; + dwc2_writel(hsotg, dctl, DCTL); - udelay(1); - } while (++i < 200); + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__); + goto fail; + return; + } + } - if (i == 200) { - dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n"); + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS || + glpmcfg & GLPMCFG_L1RESUMEOK) { + goto fail; return; } - dwc2_gadget_init_lpm(hsotg); + + /* Inform gadget to exit from L1 */ + call_gadget(hsotg, resume); + /* Change to L0 state */ + hsotg->lx_state = DWC2_L0; + hsotg->bus_suspended = false; +fail: dwc2_gadget_init_lpm(hsotg); } else { /* TODO */ dev_err(hsotg->dev, "Host side LPM is not supported.\n"); return; } - - /* Change to L0 state */ - hsotg->lx_state = DWC2_L0; - - /* Inform gadget to exit from L1 */ - call_gadget(hsotg, resume); } /* @@ -385,7 +407,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state); if (hsotg->lx_state == DWC2_L1) { - dwc2_wakeup_from_lpm_l1(hsotg); + dwc2_wakeup_from_lpm_l1(hsotg, false); return; } @@ -408,7 +430,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } else { /* Change to L0 state */ @@ -425,7 +448,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) } if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_host_exit_clock_gating(hsotg, 1); /* diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index b517a7216de2..b2f6da5b65cc 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1415,6 +1415,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, ep->name, req, req->length, req->buf, req->no_interrupt, req->zero, req->short_not_ok); + if (hs->lx_state == DWC2_L1) { + dwc2_wakeup_from_lpm_l1(hs, true); + } + /* Prevent new request submission when controller is suspended */ if (hs->lx_state != DWC2_L0) { dev_dbg(hs->dev, "%s: submit request only in active state\n", @@ -3727,6 +3731,12 @@ irq_retry: if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2) dwc2_exit_partial_power_down(hsotg, 0, true); + /* Exit gadget mode clock gating. */ + if (hsotg->params.power_down == + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) + dwc2_gadget_exit_clock_gating(hsotg, 0); + hsotg->lx_state = DWC2_L0; } diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 35c7a4df8e71..dd5b1c5691e1 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2701,8 +2701,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry); - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the periodic ready schedule to the @@ -2735,8 +2738,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the non-periodic inactive schedule to the @@ -4143,6 +4149,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd, urb->actual_length); if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + if (!hsotg->params.dma_desc_enable) + urb->start_frame = qtd->qh->start_active_frame; urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb); for (i = 0; i < urb->number_of_packets; ++i) { urb->iso_frame_desc[i].actual_length = @@ -4649,7 +4657,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, } if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else @@ -5406,9 +5414,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg) /* Backup Host regs */ hr = &hsotg->hr_backup; hr->hcfg = dwc2_readl(hsotg, HCFG); + hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR); hr->haintmsk = dwc2_readl(hsotg, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i)); + hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i)); hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i)); + hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i)); + hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i)); + hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i)); + } hr->hprt0 = dwc2_read_hprt0(hsotg); hr->hfir = dwc2_readl(hsotg, HFIR); @@ -5442,10 +5457,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg) hr->valid = false; dwc2_writel(hsotg, hr->hcfg, HCFG); + dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR); dwc2_writel(hsotg, hr->haintmsk, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i)); + dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i)); dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i)); + dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i)); + dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i)); + dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i)); + } dwc2_writel(hsotg, hr->hprt0, HPRT0); dwc2_writel(hsotg, hr->hfir, HFIR); @@ -5610,10 +5632,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, dwc2_writel(hsotg, hr->hcfg, HCFG); /* De-assert Wakeup Logic */ - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn &= ~GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } hprt0 = hr->hprt0; hprt0 |= HPRT0_PWR; @@ -5638,6 +5662,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, hprt0 |= HPRT0_RES; dwc2_writel(hsotg, hprt0, HPRT0); + /* De-assert Wakeup Logic */ + if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } /* Wait for Resume time and then program HPRT again */ mdelay(100); hprt0 &= ~HPRT0_RES; diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index 6b4d825e97a2..79582b102c7e 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -559,7 +559,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg, idx = qh->td_last; inc = qh->host_interval; hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg); - cur_idx = dwc2_frame_list_idx(hsotg->frame_number); + cur_idx = idx; next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed); /* @@ -866,6 +866,8 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, { struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; + u16 frame_desc_idx; + struct urb *usb_urb = qtd->urb->priv; u16 remain = 0; int rc = 0; @@ -878,8 +880,11 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, DMA_FROM_DEVICE); dma_desc = &qh->desc_list[idx]; + frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1); - frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last]; + frame_desc = &qtd->urb->iso_descs[frame_desc_idx]; + if (idx == qtd->isoc_td_first) + usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg); dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset); if (chan->ep_is_in) remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >> @@ -900,7 +905,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, frame_desc->status = 0; } - if (++qtd->isoc_frame_index == qtd->urb->packet_count) { + if (++qtd->isoc_frame_index == usb_urb->number_of_packets) { /* * urb->status is not used for isoc transfers here. The * individual frame_desc status are used instead. @@ -1005,11 +1010,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg, return; idx = dwc2_desclist_idx_inc(idx, qh->host_interval, chan->speed); - if (!rc) + if (rc == 0) continue; - if (rc == DWC2_CMPL_DONE) - break; + if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP) + goto stop_scan; /* rc == DWC2_CMPL_STOP */ diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h index 13abdd5f6752..12f8c7f86dc9 100644 --- a/drivers/usb/dwc2/hw.h +++ b/drivers/usb/dwc2/hw.h @@ -698,7 +698,7 @@ #define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) #define TXSTS_QTOP_TOKEN_SHIFT 25 #define TXSTS_QTOP_TERMINATE BIT(24) -#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_MASK (0x7f << 16) #define TXSTS_QSPCAVAIL_SHIFT 16 #define TXSTS_FSPCAVAIL_MASK (0xffff << 0) #define TXSTS_FSPCAVAIL_SHIFT 0 diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index b1d48019e944..7b84416dfc2b 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -331,7 +331,7 @@ static void dwc2_driver_remove(struct platform_device *dev) /* Exit clock gating when driver is removed. */ if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 3e55838c0001..31684cdaaae3 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc) else dwc->sysdev = dwc->dev; + dwc->sys_wakeup = device_may_wakeup(dwc->sysdev); + ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name); if (ret >= 0) { dwc->usb_psy = power_supply_get_by_name(usb_psy_name); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index c07edfc954f7..7e80dd3d466b 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array { * 3 - Reserved * @dis_metastability_quirk: set to disable metastability quirk. * @dis_split_quirk: set to disable split boundary. + * @sys_wakeup: set if the device may do system wakeup. * @wakeup_configured: set if the device is configured for remote wakeup. * @suspended: set to track suspend event due to U3/L2. * @imod_interval: set the interrupt moderation interval in 250ns @@ -1357,6 +1358,7 @@ struct dwc3 { unsigned dis_split_quirk:1; unsigned async_callbacks:1; + unsigned sys_wakeup:1; unsigned wakeup_configured:1; unsigned suspended:1; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 39564e17f3b0..497deed38c0c 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -51,7 +51,6 @@ #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 #define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e -#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1 #define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -423,7 +422,6 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) }, - { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) }, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 40c52dbc28d3..4df2661f6675 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g, dwc->gadget_driver = driver; spin_unlock_irqrestore(&dwc->lock, flags); + if (dwc->sys_wakeup) + device_wakeup_enable(dwc->sysdev); + return 0; } @@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g) struct dwc3 *dwc = gadget_to_dwc(g); unsigned long flags; + if (dwc->sys_wakeup) + device_wakeup_disable(dwc->sysdev); + spin_lock_irqsave(&dwc->lock, flags); dwc->gadget_driver = NULL; dwc->max_cfg_eps = 0; @@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc) else dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed); + /* No system wakeup if no gadget driver bound */ + if (dwc->sys_wakeup) + device_wakeup_disable(dwc->sysdev); + return 0; err5: diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 5a5cb6ce9946..0204787df81d 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc) goto err; } + if (dwc->sys_wakeup) { + /* Restore wakeup setting if switched from device */ + device_wakeup_enable(dwc->sysdev); + + /* Pass on wakeup setting to the new xhci platform device */ + device_init_wakeup(&xhci->dev, true); + } + return 0; err: platform_device_put(xhci); @@ -181,6 +189,9 @@ err: void dwc3_host_exit(struct dwc3 *dwc) { + if (dwc->sys_wakeup) + device_init_wakeup(&dwc->xhci->dev, false); + platform_device_unregister(dwc->xhci); dwc->xhci = NULL; } diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 9d4150124fdb..b3a9d18a8dcd 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -292,7 +292,9 @@ int usb_ep_queue(struct usb_ep *ep, { int ret = 0; - if (WARN_ON_ONCE(!ep->enabled && ep->address)) { + if (!ep->enabled && ep->address) { + pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n", + ep->address, ep->name); ret = -ESHUTDOWN; goto out; } diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 35770e608c64..2d30fc1be306 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -518,8 +518,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id, int ret; client = kzalloc(sizeof *client, GFP_KERNEL); - if (!client) + if (!client) { + kfree(data); return -ENOMEM; + } client->type = type; client->id = id; @@ -535,8 +537,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id, auxdev->dev.release = ljca_auxdev_release; ret = auxiliary_device_init(auxdev); - if (ret) + if (ret) { + kfree(data); goto err_free; + } ljca_auxdev_acpi_bind(adap, auxdev, adr, id); @@ -590,12 +594,8 @@ static int ljca_enumerate_gpio(struct ljca_adapter *adap) valid_pin[i] = get_unaligned_le32(&desc->bank_desc[i].valid_pins); bitmap_from_arr32(gpio_info->valid_pin_map, valid_pin, gpio_num); - ret = ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio", + return ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio", gpio_info, LJCA_GPIO_ACPI_ADR); - if (ret) - kfree(gpio_info); - - return ret; } static int ljca_enumerate_i2c(struct ljca_adapter *adap) @@ -629,10 +629,8 @@ static int ljca_enumerate_i2c(struct ljca_adapter *adap) ret = ljca_new_client_device(adap, LJCA_CLIENT_I2C, i, "ljca-i2c", i2c_info, LJCA_I2C1_ACPI_ADR + i); - if (ret) { - kfree(i2c_info); + if (ret) return ret; - } } return 0; @@ -669,10 +667,8 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap) ret = ljca_new_client_device(adap, LJCA_CLIENT_SPI, i, "ljca-spi", spi_info, LJCA_SPI1_ACPI_ADR + i); - if (ret) { - kfree(spi_info); + if (ret) return ret; - } } return 0; diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 8f735a86cd19..fdcffebf415c 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -262,13 +262,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), "could not get vbus regulator\n"); - nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); - if (PTR_ERR(nop->vbus_draw) == -ENODEV) - nop->vbus_draw = NULL; - if (IS_ERR(nop->vbus_draw)) - return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), - "could not get vbus regulator\n"); - nop->dev = dev; nop->phy.dev = nop->dev; nop->phy.label = "nop-xceiv"; diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 71ace274761f..08953f0d4532 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp, * daft to me. */ -static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) +static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) { struct uas_dev_info *devinfo = cmnd->device->hostdata; struct urb *urb; @@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) urb = uas_alloc_sense_urb(devinfo, gfp, cmnd); if (!urb) - return NULL; + return -ENOMEM; usb_anchor_urb(urb, &devinfo->sense_urbs); err = usb_submit_urb(urb, gfp); if (err) { usb_unanchor_urb(urb); uas_log_cmd_state(cmnd, "sense submit err", err); usb_free_urb(urb); - return NULL; } - return urb; + return err; } static int uas_submit_urbs(struct scsi_cmnd *cmnd, struct uas_dev_info *devinfo) { struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); - struct urb *urb; int err; lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & SUBMIT_STATUS_URB) { - urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC); - if (!urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + err = uas_submit_sense_urb(cmnd, GFP_ATOMIC); + if (err) + return err; cmdinfo->state &= ~SUBMIT_STATUS_URB; } @@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_FROM_DEVICE); if (!cmdinfo->data_in_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_IN_URB; } @@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_in_urb); uas_log_cmd_state(cmnd, "data in submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_IN_URB; cmdinfo->state |= DATA_IN_URB_INFLIGHT; @@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_TO_DEVICE); if (!cmdinfo->data_out_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_OUT_URB; } @@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_out_urb); uas_log_cmd_state(cmnd, "data out submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_OUT_URB; cmdinfo->state |= DATA_OUT_URB_INFLIGHT; @@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (cmdinfo->state & ALLOC_CMD_URB) { cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd); if (!cmdinfo->cmd_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_CMD_URB; } @@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->cmd_urb); uas_log_cmd_state(cmnd, "cmd submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->cmd_urb = NULL; cmdinfo->state &= ~SUBMIT_CMD_URB; @@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd) * of queueing, no matter how fatal the error */ if (err == -ENODEV) { - set_host_byte(cmnd, DID_ERROR); + set_host_byte(cmnd, DID_NO_CONNECT); scsi_done(cmnd); goto zombie; } diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 389c7f0b8d93..9610e647a8d4 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1310,6 +1310,7 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, { struct typec_port *port = to_typec_port(dev); struct usb_power_delivery *pd; + int ret; if (!port->ops || !port->ops->pd_set) return -EOPNOTSUPP; @@ -1318,7 +1319,11 @@ static ssize_t select_usb_power_delivery_store(struct device *dev, if (!pd) return -EINVAL; - return port->ops->pd_set(port, pd); + ret = port->ops->pd_set(port, pd); + if (ret) + return ret; + + return size; } static ssize_t select_usb_power_delivery_show(struct device *dev, diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ae2b6c94482d..c26fb70c3ec6 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -6861,7 +6861,7 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd) if (data->source_desc.pdo[0]) { for (i = 0; i < PDO_MAX_OBJECTS && data->source_desc.pdo[i]; i++) - port->snk_pdo[i] = data->source_desc.pdo[i]; + port->src_pdo[i] = data->source_desc.pdo[i]; port->nr_src_pdo = i + 1; } @@ -6910,7 +6910,9 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd) port->port_source_caps = data->source_cap; port->port_sink_caps = data->sink_cap; + typec_port_set_usb_power_delivery(p, NULL); port->selected_pd = pd; + typec_port_set_usb_power_delivery(p, port->selected_pd); unlock: mutex_unlock(&port->lock); return ret; @@ -6943,9 +6945,7 @@ static void tcpm_port_unregister_pd(struct tcpm_port *port) port->port_source_caps = NULL; for (i = 0; i < port->pd_count; i++) { usb_power_delivery_unregister_capabilities(port->pd_list[i]->sink_cap); - kfree(port->pd_list[i]->sink_cap); usb_power_delivery_unregister_capabilities(port->pd_list[i]->source_cap); - kfree(port->pd_list[i]->source_cap); devm_kfree(port->dev, port->pd_list[i]); port->pd_list[i] = NULL; usb_power_delivery_unregister(port->pds[i]); diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index cf52cb34d285..31d8a46ae5e7 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -151,8 +151,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; - if (cci & UCSI_CCI_NOT_SUPPORTED) + if (cci & UCSI_CCI_NOT_SUPPORTED) { + if (ucsi_acknowledge_command(ucsi) < 0) + dev_err(ucsi->dev, + "ACK of unsupported command failed\n"); return -EOPNOTSUPP; + } if (cci & UCSI_CCI_ERROR) { if (cmd == UCSI_GET_ERROR_STATUS) @@ -1133,17 +1137,21 @@ static int ucsi_check_cable(struct ucsi_connector *con) if (ret < 0) return ret; - ret = ucsi_get_cable_identity(con); - if (ret < 0) - return ret; + if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) { + ret = ucsi_get_cable_identity(con); + if (ret < 0) + return ret; + } - ret = ucsi_register_plug(con); - if (ret < 0) - return ret; + if (con->ucsi->cap.features & UCSI_CAP_ALT_MODE_DETAILS) { + ret = ucsi_register_plug(con); + if (ret < 0) + return ret; - ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P); - if (ret < 0) - return ret; + ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P); + if (ret < 0) + return ret; + } return 0; } @@ -1189,8 +1197,10 @@ static void ucsi_handle_connector_change(struct work_struct *work) ucsi_register_partner(con); ucsi_partner_task(con, ucsi_check_connection, 1, HZ); ucsi_partner_task(con, ucsi_check_connector_capability, 1, HZ); - ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ); - ucsi_partner_task(con, ucsi_check_cable, 1, HZ); + if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) + ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ); + if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS) + ucsi_partner_task(con, ucsi_check_cable, 1, HZ); if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == UCSI_CONSTAT_PWR_OPMODE_PD) @@ -1215,11 +1225,11 @@ static void ucsi_handle_connector_change(struct work_struct *work) if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) ucsi_partner_task(con, ucsi_check_altmodes, 1, 0); - clear_bit(EVENT_PENDING, &con->ucsi->flags); - mutex_lock(&ucsi->ppm_lock); + clear_bit(EVENT_PENDING, &con->ucsi->flags); ret = ucsi_acknowledge_connector_change(ucsi); mutex_unlock(&ucsi->ppm_lock); + if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); @@ -1237,7 +1247,7 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num) struct ucsi_connector *con = &ucsi->connector[num - 1]; if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) { - dev_dbg(ucsi->dev, "Bogus connector change event\n"); + dev_dbg(ucsi->dev, "Early connector change event\n"); return; } @@ -1260,13 +1270,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard) static int ucsi_reset_ppm(struct ucsi *ucsi) { - u64 command = UCSI_PPM_RESET; + u64 command; unsigned long tmo; u32 cci; int ret; mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret < 0) + goto out; + + /* + * If UCSI_CCI_RESET_COMPLETE is already set we must clear + * the flag before we start another reset. Send a + * UCSI_SET_NOTIFICATION_ENABLE command to achieve this. + * Ignore a timeout and try the reset anyway if this fails. + */ + if (cci & UCSI_CCI_RESET_COMPLETE) { + command = UCSI_SET_NOTIFICATION_ENABLE; + ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, + sizeof(command)); + if (ret < 0) + goto out; + + tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); + do { + ret = ucsi->ops->read(ucsi, UCSI_CCI, + &cci, sizeof(cci)); + if (ret < 0) + goto out; + if (cci & UCSI_CCI_COMMAND_COMPLETE) + break; + if (time_is_before_jiffies(tmo)) + break; + msleep(20); + } while (1); + + WARN_ON(cci & UCSI_CCI_RESET_COMPLETE); + } + + command = UCSI_PPM_RESET; ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, sizeof(command)); if (ret < 0) @@ -1589,8 +1633,10 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con) ucsi_register_partner(con); ucsi_pwr_opmode_change(con); ucsi_port_psy_changed(con); - ucsi_get_partner_identity(con); - ucsi_check_cable(con); + if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) + ucsi_get_partner_identity(con); + if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS) + ucsi_check_cable(con); } /* Only notify USB controller if partner supports USB data */ @@ -1636,6 +1682,7 @@ static int ucsi_init(struct ucsi *ucsi) { struct ucsi_connector *con, *connector; u64 command, ntfy; + u32 cci; int ret; int i; @@ -1688,6 +1735,13 @@ static int ucsi_init(struct ucsi *ucsi) ucsi->connector = connector; ucsi->ntfy = ntfy; + + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret) + return ret; + if (UCSI_CCI_CONNECTOR(READ_ONCE(cci))) + ucsi_connector_change(ucsi, cci); + return 0; err_unregister: diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 32daf5f58650..0e7c92eb1b22 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -206,7 +206,7 @@ struct ucsi_capability { #define UCSI_CAP_ATTR_POWER_OTHER BIT(10) #define UCSI_CAP_ATTR_POWER_VBUS BIT(14) u8 num_connectors; - u8 features; + u16 features; #define UCSI_CAP_SET_UOM BIT(0) #define UCSI_CAP_SET_PDM BIT(1) #define UCSI_CAP_ALT_MODE_DETAILS BIT(2) @@ -215,7 +215,8 @@ struct ucsi_capability { #define UCSI_CAP_CABLE_DETAILS BIT(5) #define UCSI_CAP_EXT_SUPPLY_NOTIFICATIONS BIT(6) #define UCSI_CAP_PD_RESET BIT(7) - u16 reserved_1; +#define UCSI_CAP_GET_PD_MESSAGE BIT(8) + u8 reserved_1; u8 num_alt_modes; u8 reserved_2; u16 bc_version; diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 928eacbeb21a..7b3ac133ef86 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -23,10 +23,11 @@ struct ucsi_acpi { void *base; struct completion complete; unsigned long flags; +#define UCSI_ACPI_SUPPRESS_EVENT 0 +#define UCSI_ACPI_COMMAND_PENDING 1 +#define UCSI_ACPI_ACK_PENDING 2 guid_t guid; u64 cmd; - bool dell_quirk_probed; - bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -79,9 +80,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, int ret; if (ack) - set_bit(ACK_PENDING, &ua->flags); + set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - set_bit(COMMAND_PENDING, &ua->flags); + set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -92,9 +93,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, out_clear_bit: if (ack) - clear_bit(ACK_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags); else - clear_bit(COMMAND_PENDING, &ua->flags); + clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags); return ret; } @@ -129,51 +130,40 @@ static const struct ucsi_operations ucsi_zenbook_ops = { }; /* - * Some Dell laptops expect that an ACK command with the - * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) - * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. - * If this is not done events are not delivered to OSPM and - * subsequent commands will timeout. + * Some Dell laptops don't like ACK commands with the + * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE + * bit set. To work around this send a dummy command and bundle the + * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE + * for the dummy command. */ static int ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - u64 cmd = *(u64 *)val, ack = 0; + u64 cmd = *(u64 *)val; + u64 dummycmd = UCSI_GET_CAPABILITY; int ret; - if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && - cmd & UCSI_ACK_CONNECTOR_CHANGE) - ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; - - ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); - if (ret != 0) - return ret; - if (ack == 0) - return ret; - - if (!ua->dell_quirk_probed) { - ua->dell_quirk_probed = true; - - cmd = UCSI_GET_CAPABILITY; - ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, - sizeof(cmd)); - if (ret == 0) - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, - &ack, sizeof(ack)); - if (ret != -ETIMEDOUT) + if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) { + cmd |= UCSI_ACK_COMMAND_COMPLETE; + + /* + * The UCSI core thinks it is sending a connector change ack + * and will accept new connector change events. We don't want + * this to happen for the dummy command as its response will + * still report the very event that the core is trying to clear. + */ + set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd, + sizeof(dummycmd)); + clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags); + + if (ret < 0) return ret; - - ua->dell_quirk_active = true; - dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); - dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); } - if (!ua->dell_quirk_active) - return ret; - - return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); } static const struct ucsi_operations ucsi_dell_ops = { @@ -209,13 +199,14 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (ret) return; - if (UCSI_CCI_CONNECTOR(cci)) + if (UCSI_CCI_CONNECTOR(cci) && + !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) complete(&ua->complete); if (cci & UCSI_CCI_COMMAND_COMPLETE && - test_bit(COMMAND_PENDING, &ua->flags)) + test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 932e7bf69447..ce08eb33e5be 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -255,6 +255,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) static void pmic_glink_ucsi_register(struct work_struct *work) { struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work); + int orientation; + int i; + + for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) { + if (!ucsi->port_orientation[i]) + continue; + orientation = gpiod_get_value(ucsi->port_orientation[i]); + + if (orientation >= 0) { + typec_switch_set(ucsi->port_switch[i], + orientation ? TYPEC_ORIENTATION_REVERSE + : TYPEC_ORIENTATION_NORMAL); + } + } ucsi_register(ucsi->ucsi); } diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index e3179e987cdb..197b6d5268e9 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -494,6 +494,7 @@ config FB_SBUS_HELPERS select FB_CFB_COPYAREA select FB_CFB_FILLRECT select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS config FB_BW2 bool "BWtwo support" @@ -514,6 +515,7 @@ config FB_CG6 depends on (FB = y) && (SPARC && FB_SBUS) select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS help This is the frame buffer device driver for the CGsix (GX, TurboGX) frame buffer. @@ -523,6 +525,7 @@ config FB_FFB depends on FB_SBUS && SPARC64 select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS help This is the frame buffer device driver for the Creator, Creator3D, and Elite3D graphics boards. diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 360a5304ec03..b01b1bbf2493 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -344,17 +344,21 @@ void v9fs_evict_inode(struct inode *inode) struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode); __le32 __maybe_unused version; - truncate_inode_pages_final(&inode->i_data); + if (!is_bad_inode(inode)) { + truncate_inode_pages_final(&inode->i_data); - version = cpu_to_le32(v9inode->qid.version); - netfs_clear_inode_writeback(inode, &version); + version = cpu_to_le32(v9inode->qid.version); + netfs_clear_inode_writeback(inode, &version); - clear_inode(inode); - filemap_fdatawrite(&inode->i_data); + clear_inode(inode); + filemap_fdatawrite(&inode->i_data); #ifdef CONFIG_9P_FSCACHE - fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); + if (v9fs_inode_cookie(v9inode)) + fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); #endif + } else + clear_inode(inode); } struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index ef9db3e03506..55dde186041a 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -78,11 +78,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) retval = v9fs_init_inode(v9ses, inode, &fid->qid, st->st_mode, new_decode_dev(st->st_rdev)); + v9fs_stat2inode_dotl(st, inode, 0); kfree(st); if (retval) goto error; - v9fs_stat2inode_dotl(st, inode, 0); v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); @@ -297,7 +297,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, umode_t omode) { int err; - struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; kgid_t gid; const unsigned char *name; @@ -307,7 +306,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); - v9ses = v9fs_inode2v9ses(dir); omode |= S_IFDIR; if (dir->i_mode & S_ISGID) @@ -739,7 +737,6 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, kgid_t gid; const unsigned char *name; umode_t mode; - struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; struct p9_qid qid; @@ -749,7 +746,6 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); - v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); @@ -1202,8 +1202,8 @@ static void aio_complete(struct aio_kiocb *iocb) spin_lock_irqsave(&ctx->wait.lock, flags); list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry) if (avail >= curr->min_nr) { - list_del_init_careful(&curr->w.entry); wake_up_process(curr->w.private); + list_del_init_careful(&curr->w.entry); } spin_unlock_irqrestore(&ctx->wait.lock, flags); } diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index b02796c8a595..66ca0bbee639 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -17,6 +17,7 @@ bcachefs-y := \ btree_journal_iter.o \ btree_key_cache.o \ btree_locking.o \ + btree_node_scan.o \ btree_trans_commit.o \ btree_update.o \ btree_update_interior.o \ @@ -37,6 +38,7 @@ bcachefs-y := \ error.o \ extents.o \ extent_update.o \ + eytzinger.o \ fs.o \ fs-common.o \ fs-ioctl.o \ @@ -67,6 +69,7 @@ bcachefs-y := \ quota.o \ rebalance.o \ recovery.o \ + recovery_passes.o \ reflink.o \ replicas.o \ sb-clean.o \ diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 893e38f9db80..4ff56fa4d539 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1713,34 +1713,37 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, if (ret) goto out; - if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { - a->v.gen++; - SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); - goto write; - } - - if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { - bch2_trans_inconsistent(trans, - "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" - "%s", - a->v.journal_seq, - c->journal.flushed_seq_ondisk, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + if (a->v.dirty_sectors) { + if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, + trans, "attempting to discard bucket with dirty data\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = -EIO; - } goto out; } if (a->v.data_type != BCH_DATA_need_discard) { - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { - bch2_trans_inconsistent(trans, - "bucket incorrectly set in need_discard btree\n" - "%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - ret = -EIO; + if (data_type_is_empty(a->v.data_type) && + BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { + a->v.gen++; + SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); + goto write; } + if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, + trans, "bucket incorrectly set in need_discard btree\n" + "%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = -EIO; + goto out; + } + + if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { + if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, + trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s", + a->v.journal_seq, + c->journal.flushed_seq_ondisk, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = -EIO; goto out; } @@ -1835,6 +1838,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo if (ret) goto err; + BUG_ON(a->v.dirty_sectors); SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); a->v.data_type = alloc_data_type(a->v, a->v.data_type); @@ -1942,6 +1946,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, goto out; BUG_ON(a->v.data_type != BCH_DATA_cached); + BUG_ON(a->v.dirty_sectors); if (!a->v.cached_sectors) bch_err(c, "invalidating empty bucket, confused"); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 214b15c84d1f..a1fc30adf912 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca) static inline unsigned open_buckets_reserved(enum bch_watermark watermark) { switch (watermark) { - case BCH_WATERMARK_reclaim: + case BCH_WATERMARK_interior_updates: return 0; + case BCH_WATERMARK_reclaim: + return OPEN_BUCKETS_COUNT / 6; case BCH_WATERMARK_btree: case BCH_WATERMARK_btree_copygc: return OPEN_BUCKETS_COUNT / 4; diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index b91b7a461056..c2226e947c41 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -22,7 +22,8 @@ struct bucket_alloc_state { x(copygc) \ x(btree) \ x(btree_copygc) \ - x(reclaim) + x(reclaim) \ + x(interior_updates) enum bch_watermark { #define x(name) BCH_WATERMARK_##name, diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 8cb35ea572cb..114328acde72 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -8,6 +8,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" +#include "checksum.h" #include "error.h" #include <linux/mm.h> @@ -29,8 +30,7 @@ static bool extent_matches_bp(struct bch_fs *c, if (p.ptr.cached) continue; - bch2_extent_ptr_to_bp(c, btree_id, level, k, p, - &bucket2, &bp2); + bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2); if (bpos_eq(bucket, bucket2) && !memcmp(&bp, &bp2, sizeof(bp))) return true; @@ -44,6 +44,11 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); + + /* these will be caught by fsck */ + if (!bch2_dev_exists2(c, bp.k->p.inode)) + return 0; + struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); int ret = 0; @@ -378,7 +383,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ backpointer_to_missing_alloc, "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", alloc_iter.pos.inode, alloc_iter.pos.offset, - (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, bp_iter, 0); goto out; } @@ -414,6 +419,84 @@ struct extents_to_bp_state { struct bkey_buf last_flushed; }; +static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, + struct bkey_s_c extent, unsigned dev) +{ + struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + bch2_bkey_drop_device(bkey_i_to_s(n), dev); + return bch2_btree_insert_trans(trans, btree, n, 0); +} + +static int check_extent_checksum(struct btree_trans *trans, + enum btree_id btree, struct bkey_s_c extent, + enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct printbuf buf = PRINTBUF; + void *data_buf = NULL; + struct bio *bio = NULL; + size_t bytes; + int ret = 0; + + if (bkey_is_btree_ptr(extent.k)) + return false; + + bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) + if (p.ptr.dev == dev) + goto found; + BUG(); +found: + if (!p.crc.csum_type) + return false; + + bytes = p.crc.compressed_size << 9; + + struct bch_dev *ca = bch_dev_bkey_exists(c, dev); + if (!bch2_dev_get_ioref(ca, READ)) + return false; + + data_buf = kvmalloc(bytes, GFP_KERNEL); + if (!data_buf) { + ret = -ENOMEM; + goto err; + } + + bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL); + bio->bi_iter.bi_sector = p.ptr.offset; + bch2_bio_map(bio, data_buf, bytes); + ret = submit_bio_wait(bio); + if (ret) + goto err; + + prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); + prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); + bch2_bkey_val_to_text(&buf, c, extent); + prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); + bch2_bkey_val_to_text(&buf, c, extent2); + + struct nonce nonce = extent_nonce(extent.k->version, p.crc); + struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); + if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), + c, dup_backpointer_to_bad_csum_extent, + "%s", buf.buf)) + ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; +fsck_err: +err: + if (bio) + bio_put(bio); + kvfree(data_buf); + percpu_ref_put(&ca->io_ref); + printbuf_exit(&buf); + return ret; +} + static int check_bp_exists(struct btree_trans *trans, struct extents_to_bp_state *s, struct bpos bucket, @@ -421,7 +504,8 @@ static int check_bp_exists(struct btree_trans *trans, struct bkey_s_c orig_k) { struct bch_fs *c = trans->c; - struct btree_iter bp_iter = { NULL }; + struct btree_iter bp_iter = {}; + struct btree_iter other_extent_iter = {}; struct printbuf buf = PRINTBUF; struct bkey_s_c bp_k; struct bkey_buf tmp; @@ -429,13 +513,19 @@ static int check_bp_exists(struct btree_trans *trans, bch2_bkey_buf_init(&tmp); + if (!bch2_dev_bucket_exists(c, bucket)) { + prt_str(&buf, "extent for nonexistent device:bucket "); + bch2_bpos_to_text(&buf, bucket); + prt_str(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, orig_k); + bch_err(c, "%s", buf.buf); + return -BCH_ERR_fsck_repair_unimplemented; + } + if (bpos_lt(bucket, s->bucket_start) || bpos_gt(bucket, s->bucket_end)) return 0; - if (!bch2_dev_bucket_exists(c, bucket)) - goto missing; - bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bucket_pos_to_bp(c, bucket, bp.bucket_offset), 0); @@ -461,21 +551,94 @@ static int check_bp_exists(struct btree_trans *trans, ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } - goto missing; + + goto check_existing_bp; } out: err: fsck_err: + bch2_trans_iter_exit(trans, &other_extent_iter); bch2_trans_iter_exit(trans, &bp_iter); bch2_bkey_buf_exit(&tmp, c); printbuf_exit(&buf); return ret; +check_existing_bp: + /* Do we have a backpointer for a different extent? */ + if (bp_k.k->type != KEY_TYPE_backpointer) + goto missing; + + struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; + + struct bkey_s_c other_extent = + bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); + ret = bkey_err(other_extent); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + ret = 0; + if (ret) + goto err; + + if (!other_extent.k) + goto missing; + + if (bch2_extents_match(orig_k, other_extent)) { + printbuf_reset(&buf); + prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); + bch2_bkey_val_to_text(&buf, c, orig_k); + prt_str(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, other_extent); + bch_err(c, "%s", buf.buf); + + if (other_extent.k->size <= orig_k.k->size) { + ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); + if (ret) + goto err; + goto out; + } else { + ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); + if (ret) + goto err; + goto missing; + } + } + + ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); + if (ret < 0) + goto err; + if (ret) { + ret = 0; + goto missing; + } + + ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); + if (ret < 0) + goto err; + if (ret) { + ret = 0; + goto out; + } + + printbuf_reset(&buf); + prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); + bch2_bkey_val_to_text(&buf, c, orig_k); + prt_str(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, other_extent); + bch_err(c, "%s", buf.buf); + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err; missing: + printbuf_reset(&buf); prt_printf(&buf, "missing backpointer for btree=%s l=%u ", bch2_btree_id_str(bp.btree_id), bp.level); bch2_bkey_val_to_text(&buf, c, orig_k); - prt_printf(&buf, "\nbp pos "); - bch2_bpos_to_text(&buf, bp_iter.pos); + prt_printf(&buf, "\n got: "); + bch2_bkey_val_to_text(&buf, c, bp_k); + + struct bkey_i_backpointer n_bp_k; + bkey_backpointer_init(&n_bp_k.k_i); + n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); + n_bp_k.v = bp; + prt_printf(&buf, "\n want: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); @@ -502,8 +665,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, if (p.ptr.cached) continue; - bch2_extent_ptr_to_bp(c, btree, level, - k, p, &bucket_pos, &bp); + bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp); ret = check_bp_exists(trans, s, bucket_pos, bp, k); if (ret) diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 327365a9feac..da012ca7daee 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -90,20 +90,40 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i); } -static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p) +static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, + struct extent_ptr_decoded p, + const union bch_extent_entry *entry) { - return level ? BCH_DATA_btree : - p.has_ec ? BCH_DATA_stripe : - BCH_DATA_user; + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: + return BCH_DATA_btree; + case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: + return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user; + case KEY_TYPE_stripe: { + const struct bch_extent_ptr *ptr = &entry->ptr; + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + + BUG_ON(ptr < s.v->ptrs || + ptr >= s.v->ptrs + s.v->nr_blocks); + + return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant + ? BCH_DATA_parity + : BCH_DATA_user; + } + default: + BUG(); + } } static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, enum btree_id btree_id, unsigned level, struct bkey_s_c k, struct extent_ptr_decoded p, + const union bch_extent_entry *entry, struct bpos *bucket_pos, struct bch_backpointer *bp) { - enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); s64 sectors = level ? btree_sectors(c) : k.k->size; u32 bucket_offset; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 799aa32b6b4d..a31a5f706929 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -209,7 +209,7 @@ #include "fifo.h" #include "nocow_locking_types.h" #include "opts.h" -#include "recovery_types.h" +#include "recovery_passes_types.h" #include "sb-errors_types.h" #include "seqmutex.h" #include "time_stats.h" @@ -456,6 +456,7 @@ enum bch_time_stats { #include "alloc_types.h" #include "btree_types.h" +#include "btree_node_scan_types.h" #include "btree_write_buffer_types.h" #include "buckets_types.h" #include "buckets_waiting_for_journal_types.h" @@ -614,6 +615,7 @@ struct bch_dev { */ #define BCH_FS_FLAGS() \ + x(new_fs) \ x(started) \ x(may_go_rw) \ x(rw) \ @@ -796,6 +798,7 @@ struct bch_fs { u64 features; u64 compat; unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; + u64 btrees_lost_data; } sb; @@ -810,7 +813,6 @@ struct bch_fs { /* snapshot.c: */ struct snapshot_table __rcu *snapshots; - size_t snapshot_table_size; struct mutex snapshot_table_lock; struct rw_semaphore snapshot_create_lock; @@ -1104,6 +1106,8 @@ struct bch_fs { struct journal_keys journal_keys; struct list_head journal_iters; + struct find_btree_nodes found_btree_nodes; + u64 last_bucket_seq_cleanup; u64 counters_on_mount[BCH_COUNTER_NR]; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index bff8750ac0d7..63102992d955 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -818,6 +818,7 @@ struct bch_sb_field_ext { struct bch_sb_field field; __le64 recovery_passes_required[2]; __le64 errors_silent[8]; + __le64 btrees_lost_data; }; struct bch_sb_field_downgrade_entry { diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 3fd1085b6c61..3bb477840eab 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -134,18 +134,24 @@ void bch2_dump_btree_node_iter(struct btree *b, printbuf_exit(&buf); } -#ifdef CONFIG_BCACHEFS_DEBUG - -void __bch2_verify_btree_nr_keys(struct btree *b) +struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b) { struct bset_tree *t; struct bkey_packed *k; - struct btree_nr_keys nr = { 0 }; + struct btree_nr_keys nr = {}; for_each_bset(b, t) bset_tree_for_each_key(b, t, k) if (!bkey_deleted(k)) btree_keys_account_key_add(&nr, t - b->set, k); + return nr; +} + +#ifdef CONFIG_BCACHEFS_DEBUG + +void __bch2_verify_btree_nr_keys(struct btree *b) +{ + struct btree_nr_keys nr = bch2_btree_node_count_keys(b); BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); } diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index 79c77baaa383..120a79fd456b 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -458,6 +458,8 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, /* Accounting: */ +struct btree_nr_keys bch2_btree_node_count_keys(struct btree *); + static inline void btree_keys_account_key(struct btree_nr_keys *n, unsigned bset, struct bkey_packed *k, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 562561a9a510..84474324dba9 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -808,7 +808,8 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) prt_printf(&buf, "\nmax "); bch2_bpos_to_text(&buf, b->data->max_key); - bch2_fs_inconsistent(c, "%s", buf.buf); + bch2_fs_topology_error(c, "%s", buf.buf); + printbuf_exit(&buf); } @@ -1134,6 +1135,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) b = btree_cache_find(bc, k); if (!b) return; + + BUG_ON(b == btree_node_root(trans->c, b)); wait_on_io: /* not allowed to wait on io with btree locks held: */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index bdaed29f084a..6280da1244b5 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -7,11 +7,13 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "backpointers.h" #include "bkey_methods.h" #include "bkey_buf.h" #include "btree_journal_iter.h" #include "btree_key_cache.h" #include "btree_locking.h" +#include "btree_node_scan.h" #include "btree_update_interior.h" #include "btree_io.h" #include "btree_gc.h" @@ -24,7 +26,7 @@ #include "journal.h" #include "keylist.h" #include "move.h" -#include "recovery.h" +#include "recovery_passes.h" #include "reflink.h" #include "replicas.h" #include "super-io.h" @@ -40,6 +42,7 @@ #define DROP_THIS_NODE 10 #define DROP_PREV_NODE 11 +#define DID_FILL_FROM_SCAN 12 static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) { @@ -70,90 +73,6 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) __gc_pos_set(c, new_pos); } -/* - * Missing: if an interior btree node is empty, we need to do something - - * perhaps just kill it - */ -static int bch2_gc_check_topology(struct bch_fs *c, - struct btree *b, - struct bkey_buf *prev, - struct bkey_buf cur, - bool is_last) -{ - struct bpos node_start = b->data->min_key; - struct bpos node_end = b->data->max_key; - struct bpos expected_start = bkey_deleted(&prev->k->k) - ? node_start - : bpos_successor(prev->k->k.p); - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; - int ret = 0; - - if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) { - struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k); - - if (!bpos_eq(expected_start, bp->v.min_key)) { - bch2_topology_error(c); - - if (bkey_deleted(&prev->k->k)) { - prt_printf(&buf1, "start of node: "); - bch2_bpos_to_text(&buf1, node_start); - } else { - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k)); - } - bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k)); - - if (__fsck_err(c, - FSCK_CAN_FIX| - FSCK_CAN_IGNORE| - FSCK_NO_RATELIMIT, - btree_node_topology_bad_min_key, - "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " cur %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { - bch_info(c, "Halting mark and sweep to start topology repair pass"); - ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); - goto err; - } else { - set_bit(BCH_FS_initial_gc_unfixed, &c->flags); - } - } - } - - if (is_last && !bpos_eq(cur.k->k.p, node_end)) { - bch2_topology_error(c); - - printbuf_reset(&buf1); - printbuf_reset(&buf2); - - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k)); - bch2_bpos_to_text(&buf2, node_end); - - if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT, - btree_node_topology_bad_max_key, - "btree node with incorrect max_key at btree %s level %u:\n" - " %s\n" - " expected %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf) && - should_restart_for_topology_repair(c)) { - bch_info(c, "Halting mark and sweep to start topology repair pass"); - ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); - goto err; - } else { - set_bit(BCH_FS_initial_gc_unfixed, &c->flags); - } - } - - bch2_bkey_buf_copy(prev, c, cur.k); -err: -fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); - return ret; -} - static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst) { switch (b->key.k.type) { @@ -212,6 +131,17 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) struct bkey_i_btree_ptr_v2 *new; int ret; + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, " -> "); + bch2_bpos_to_text(&buf, new_min); + + bch_info(c, "%s(): %s", __func__, buf.buf); + printbuf_exit(&buf); + } + new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); if (!new) return -BCH_ERR_ENOMEM_gc_repair_key; @@ -237,6 +167,17 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) struct bkey_i_btree_ptr_v2 *new; int ret; + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, " -> "); + bch2_bpos_to_text(&buf, new_max); + + bch_info(c, "%s(): %s", __func__, buf.buf); + printbuf_exit(&buf); + } + ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); if (ret) return ret; @@ -268,127 +209,138 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) return 0; } -static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, - struct btree *prev, struct btree *cur) +static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b, + struct btree *prev, struct btree *cur, + struct bpos *pulled_from_scan) { struct bpos expected_start = !prev ? b->data->min_key : bpos_successor(prev->key.k.p); - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; int ret = 0; - if (!prev) { - prt_printf(&buf1, "start of node: "); - bch2_bpos_to_text(&buf1, b->data->min_key); - } else { - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key)); + BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && + !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, + b->data->min_key)); + + if (bpos_eq(expected_start, cur->data->min_key)) + return 0; + + prt_printf(&buf, " at btree %s level %u:\n parent: ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + if (prev) { + prt_printf(&buf, "\n prev: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key)); } - bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key)); - - if (prev && - bpos_gt(expected_start, cur->data->min_key) && - BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { - /* cur overwrites prev: */ - - if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key, - cur->data->min_key), c, - btree_node_topology_overwritten_by_next_node, - "btree node overwritten by next node at btree %s level %u:\n" - " node %s\n" - " next %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) { - ret = DROP_PREV_NODE; - goto out; - } + prt_str(&buf, "\n next: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key)); - if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p, - bpos_predecessor(cur->data->min_key)), c, - btree_node_topology_bad_max_key, - "btree node with incorrect max_key at btree %s level %u:\n" - " node %s\n" - " next %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) - ret = set_node_max(c, prev, - bpos_predecessor(cur->data->min_key)); - } else { - /* prev overwrites cur: */ - - if (mustfix_fsck_err_on(bpos_ge(expected_start, - cur->data->max_key), c, - btree_node_topology_overwritten_by_prev_node, - "btree node overwritten by prev node at btree %s level %u:\n" - " prev %s\n" - " node %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) { - ret = DROP_THIS_NODE; - goto out; - } + if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */ + if (b->c.level == 1 && + bpos_lt(*pulled_from_scan, cur->data->min_key)) { + ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, + expected_start, + bpos_predecessor(cur->data->min_key)); + if (ret) + goto err; - if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c, - btree_node_topology_bad_min_key, - "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " node %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) - ret = set_node_min(c, cur, expected_start); + *pulled_from_scan = cur->data->min_key; + ret = DID_FILL_FROM_SCAN; + } else { + if (mustfix_fsck_err(c, btree_node_topology_bad_min_key, + "btree node with incorrect min_key%s", buf.buf)) + ret = set_node_min(c, cur, expected_start); + } + } else { /* overlap */ + if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { /* cur overwrites prev */ + if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */ + if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node, + "btree node overwritten by next node%s", buf.buf)) + ret = DROP_PREV_NODE; + } else { + if (mustfix_fsck_err(c, btree_node_topology_bad_max_key, + "btree node with incorrect max_key%s", buf.buf)) + ret = set_node_max(c, prev, + bpos_predecessor(cur->data->min_key)); + } + } else { + if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */ + if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node, + "btree node overwritten by prev node%s", buf.buf)) + ret = DROP_THIS_NODE; + } else { + if (mustfix_fsck_err(c, btree_node_topology_bad_min_key, + "btree node with incorrect min_key%s", buf.buf)) + ret = set_node_min(c, cur, expected_start); + } + } } -out: +err: fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); + printbuf_exit(&buf); return ret; } static int btree_repair_node_end(struct bch_fs *c, struct btree *b, - struct btree *child) + struct btree *child, struct bpos *pulled_from_scan) { - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; int ret = 0; - bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key)); - bch2_bpos_to_text(&buf2, b->key.k.p); + if (bpos_eq(child->key.k.p, b->key.k.p)) + return 0; - if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c, - btree_node_topology_bad_max_key, - "btree node with incorrect max_key at btree %s level %u:\n" - " %s\n" - " expected %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf)) { - ret = set_node_max(c, child, b->key.k.p); - if (ret) - goto err; + prt_printf(&buf, "at btree %s level %u:\n parent: ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + prt_str(&buf, "\n child: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key)); + + if (mustfix_fsck_err(c, btree_node_topology_bad_max_key, + "btree node with incorrect max_key%s", buf.buf)) { + if (b->c.level == 1 && + bpos_lt(*pulled_from_scan, b->key.k.p)) { + ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, + bpos_successor(child->key.k.p), b->key.k.p); + if (ret) + goto err; + + *pulled_from_scan = b->key.k.p; + ret = DID_FILL_FROM_SCAN; + } else { + ret = set_node_max(c, child, b->key.k.p); + } } err: fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); + printbuf_exit(&buf); return ret; } -static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b) +static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b, + struct bpos *pulled_from_scan) { struct bch_fs *c = trans->c; struct btree_and_journal_iter iter; struct bkey_s_c k; struct bkey_buf prev_k, cur_k; struct btree *prev = NULL, *cur = NULL; - bool have_child, dropped_children = false; + bool have_child, new_pass = false; struct printbuf buf = PRINTBUF; int ret = 0; if (!b->c.level) return 0; -again: - prev = NULL; - have_child = dropped_children = false; + bch2_bkey_buf_init(&prev_k); bch2_bkey_buf_init(&cur_k); +again: + cur = prev = NULL; + have_child = new_pass = false; bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; @@ -415,9 +367,10 @@ again: b->c.level - 1, buf.buf)) { bch2_btree_node_evict(trans, cur_k.k); - ret = bch2_journal_key_delete(c, b->c.btree_id, - b->c.level, cur_k.k->k.p); cur = NULL; + ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: + bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); if (ret) break; continue; @@ -427,7 +380,23 @@ again: if (ret) break; - ret = btree_repair_node_boundaries(c, b, prev, cur); + if (bch2_btree_node_is_stale(c, cur)) { + bch_info(c, "btree node %s older than nodes found by scanning", buf.buf); + six_unlock_read(&cur->c.lock); + bch2_btree_node_evict(trans, cur_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); + cur = NULL; + if (ret) + break; + continue; + } + + ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan); + if (ret == DID_FILL_FROM_SCAN) { + new_pass = true; + ret = 0; + } if (ret == DROP_THIS_NODE) { six_unlock_read(&cur->c.lock); @@ -445,6 +414,7 @@ again: prev = NULL; if (ret == DROP_PREV_NODE) { + bch_info(c, "dropped prev node"); bch2_btree_node_evict(trans, prev_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, prev_k.k->k.p); @@ -452,8 +422,6 @@ again: break; bch2_btree_and_journal_iter_exit(&iter); - bch2_bkey_buf_exit(&prev_k, c); - bch2_bkey_buf_exit(&cur_k, c); goto again; } else if (ret) break; @@ -465,7 +433,11 @@ again: if (!ret && !IS_ERR_OR_NULL(prev)) { BUG_ON(cur); - ret = btree_repair_node_end(c, b, prev); + ret = btree_repair_node_end(c, b, prev, pulled_from_scan); + if (ret == DID_FILL_FROM_SCAN) { + new_pass = true; + ret = 0; + } } if (!IS_ERR_OR_NULL(prev)) @@ -479,6 +451,10 @@ again: goto err; bch2_btree_and_journal_iter_exit(&iter); + + if (new_pass) + goto again; + bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); iter.prefetch = true; @@ -495,7 +471,7 @@ again: if (ret) goto err; - ret = bch2_btree_repair_topology_recurse(trans, cur); + ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan); six_unlock_read(&cur->c.lock); cur = NULL; @@ -503,7 +479,7 @@ again: bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); - dropped_children = true; + new_pass = true; } if (ret) @@ -530,12 +506,14 @@ fsck_err: six_unlock_read(&cur->c.lock); bch2_btree_and_journal_iter_exit(&iter); - bch2_bkey_buf_exit(&prev_k, c); - bch2_bkey_buf_exit(&cur_k, c); - if (!ret && dropped_children) + if (!ret && new_pass) goto again; + BUG_ON(!ret && bch2_btree_node_check_topology(trans, b)); + + bch2_bkey_buf_exit(&prev_k, c); + bch2_bkey_buf_exit(&cur_k, c); printbuf_exit(&buf); return ret; } @@ -543,32 +521,63 @@ fsck_err: int bch2_check_topology(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - struct btree *b; - unsigned i; + struct bpos pulled_from_scan = POS_MIN; int ret = 0; - for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { struct btree_root *r = bch2_btree_id_root(c, i); + bool reconstructed_root = false; - if (!r->alive) - continue; + if (r->error) { + ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + break; +reconstruct_root: + bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i)); - b = r->b; - if (btree_node_fake(b)) - continue; + r->alive = false; + r->error = 0; + + if (!bch2_btree_has_scanned_nodes(c, i)) { + mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", bch2_btree_id_str(i)); + bch2_btree_root_alloc_fake(c, i, 0); + } else { + bch2_btree_root_alloc_fake(c, i, 1); + ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); + if (ret) + break; + } + + bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + reconstructed_root = true; + } + + struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); - ret = bch2_btree_repair_topology_recurse(trans, b); + ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); six_unlock_read(&b->c.lock); if (ret == DROP_THIS_NODE) { - bch_err(c, "empty btree root - repair unimplemented"); - ret = -BCH_ERR_fsck_repair_unimplemented; + bch2_btree_node_hash_remove(&c->btree_cache, b); + mutex_lock(&c->btree_cache.lock); + list_move(&b->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); + + r->b = NULL; + + if (!reconstructed_root) + goto reconstruct_root; + + bch_err(c, "empty btree root %s", bch2_btree_id_str(i)); + bch2_btree_root_alloc_fake(c, i, 0); + r->alive = false; + ret = 0; } } - +fsck_err: bch2_trans_put(trans); - return ret; } @@ -591,7 +600,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) { struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); - enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, p, entry_c); if (fsck_err_on(!g->gen_valid, c, ptr_to_missing_alloc_key, @@ -657,7 +666,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id continue; if (fsck_err_on(bucket_data_type(g->data_type) && - bucket_data_type(g->data_type) != data_type, c, + bucket_data_type(g->data_type) != + bucket_data_type(data_type), c, ptr_bucket_data_type_mismatch, "bucket %u:%zu different types of data in same bucket: %s, %s\n" "while marking %s", @@ -698,18 +708,13 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id } if (do_update) { - struct bkey_ptrs ptrs; - union bch_extent_entry *entry; - struct bch_extent_ptr *ptr; - struct bkey_i *new; - if (is_root) { bch_err(c, "cannot update btree roots yet"); ret = -EINVAL; goto err; } - new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); + struct bkey_i *new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); if (!new) { ret = -BCH_ERR_ENOMEM_gc_repair_key; bch_err_msg(c, ret, "allocating new key"); @@ -724,7 +729,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id * btree node isn't there anymore, the read path will * sort it out: */ - ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); bkey_for_each_ptr(ptrs, ptr) { struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bucket *g = PTR_GC_BUCKET(ca, ptr); @@ -732,19 +737,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id ptr->gen = g->gen; } } else { - bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_GC_BUCKET(ca, ptr); - enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr); - - (ptr->cached && - (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) || - (!ptr->cached && - gen_cmp(ptr->gen, g->gen) < 0) || - gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX || - (g->data_type && - g->data_type != data_type); - })); + struct bkey_ptrs ptrs; + union bch_extent_entry *entry; +restart_drop_ptrs: + ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); + bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) { + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); + struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry); + + if ((p.ptr.cached && + (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) || + (!p.ptr.cached && + gen_cmp(p.ptr.gen, g->gen) < 0) || + gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX || + (g->data_type && + g->data_type != data_type)) { + bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr); + goto restart_drop_ptrs; + } + } again: ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); bkey_extent_entry_for_each(ptrs, entry) { @@ -774,12 +786,6 @@ found: } } - ret = bch2_journal_key_insert_take(c, btree_id, level, new); - if (ret) { - kfree(new); - goto err; - } - if (level) bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new); @@ -793,6 +799,12 @@ found: bch_info(c, "new key %s", buf.buf); } + ret = bch2_journal_key_insert_take(c, btree_id, level, new); + if (ret) { + kfree(new); + goto err; + } + *k = bkey_i_to_s_c(new); } err: @@ -819,10 +831,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, BUG_ON(bch2_journal_seq_verify && k->k->version.lo > atomic64_read(&c->journal.seq)); - ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k); - if (ret) - goto err; - if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c, bkey_version_in_future, "key version number higher than recorded: %llu > %llu", @@ -831,8 +839,13 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, atomic64_set(&c->key_version, k->k->version.lo); } + ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k); + if (ret) + goto err; + ret = commit_do(trans, NULL, NULL, 0, - bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); + bch2_key_trigger(trans, btree_id, level, old, + unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); fsck_err: err: bch_err_fn(c, ret); @@ -841,42 +854,30 @@ err: static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial) { - struct bch_fs *c = trans->c; struct btree_node_iter iter; struct bkey unpacked; struct bkey_s_c k; - struct bkey_buf prev, cur; int ret = 0; + ret = bch2_btree_node_check_topology(trans, b); + if (ret) + return ret; + if (!btree_node_type_needs_gc(btree_node_type(b))) return 0; bch2_btree_node_iter_init_from_start(&iter, b); - bch2_bkey_buf_init(&prev); - bch2_bkey_buf_init(&cur); - bkey_init(&prev.k->k); while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) { ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false, &k, initial); if (ret) - break; + return ret; bch2_btree_node_iter_advance(&iter, b); - - if (b->c.level) { - bch2_bkey_buf_reassemble(&cur, c, k); - - ret = bch2_gc_check_topology(c, b, &prev, cur, - bch2_btree_node_iter_end(&iter)); - if (ret) - break; - } } - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); - return ret; + return 0; } static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id, @@ -925,14 +926,16 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b struct bch_fs *c = trans->c; struct btree_and_journal_iter iter; struct bkey_s_c k; - struct bkey_buf cur, prev; + struct bkey_buf cur; struct printbuf buf = PRINTBUF; int ret = 0; + ret = bch2_btree_node_check_topology(trans, b); + if (ret) + return ret; + bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); - bch2_bkey_buf_init(&prev); bch2_bkey_buf_init(&cur); - bkey_init(&prev.k->k); while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); @@ -943,20 +946,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b if (ret) goto fsck_err; - if (b->c.level) { - bch2_bkey_buf_reassemble(&cur, c, k); - k = bkey_i_to_s_c(cur.k); - - bch2_btree_and_journal_iter_advance(&iter); - - ret = bch2_gc_check_topology(c, b, - &prev, cur, - !bch2_btree_and_journal_iter_peek(&iter).k); - if (ret) - goto fsck_err; - } else { - bch2_btree_and_journal_iter_advance(&iter); - } + bch2_btree_and_journal_iter_advance(&iter); } if (b->c.level > target_depth) { @@ -1015,7 +1005,6 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b } fsck_err: bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); bch2_btree_and_journal_iter_exit(&iter); printbuf_exit(&buf); return ret; @@ -1033,9 +1022,6 @@ static int bch2_gc_btree_init(struct btree_trans *trans, b = bch2_btree_id_root(c, btree_id)->b; - if (btree_node_fake(b)) - return 0; - six_lock_read(&b->c.lock, NULL, NULL); printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->data->min_key); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 34df8ccc5fec..d7de82ac3893 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -654,6 +654,7 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) */ bch2_bset_set_no_aux_tree(b, b->set); bch2_btree_build_aux_trees(b); + b->nr = bch2_btree_node_count_keys(b); struct bkey_s_c k; struct bkey unpacked; @@ -1263,10 +1264,12 @@ out: return retry_read; fsck_err: if (ret == -BCH_ERR_btree_node_read_err_want_retry || - ret == -BCH_ERR_btree_node_read_err_must_retry) + ret == -BCH_ERR_btree_node_read_err_must_retry) { retry_read = 1; - else + } else { set_btree_node_read_error(b); + bch2_btree_lost_data(c, b->c.btree_id); + } goto out; } @@ -1327,6 +1330,7 @@ start: if (!can_retry) { set_btree_node_read_error(b); + bch2_btree_lost_data(c, b->c.btree_id); break; } } @@ -1526,9 +1530,10 @@ fsck_err: ret = -1; } - if (ret) + if (ret) { set_btree_node_read_error(b); - else if (*saw_error) + bch2_btree_lost_data(c, b->c.btree_id); + } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); for (i = 0; i < ra->nr; i++) { @@ -1657,13 +1662,14 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); - bch_err(c, "%s", buf.buf); + bch_err_ratelimited(c, "%s", buf.buf); if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) bch2_fatal_error(c); set_btree_node_read_error(b); + bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); @@ -1860,7 +1866,7 @@ static void btree_node_write_work(struct work_struct *work) } else { ret = bch2_trans_do(c, NULL, NULL, 0, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, - BCH_WATERMARK_reclaim| + BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_check_rw, diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 51bcdc6c6d1c..2a211a4bebd1 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -927,8 +927,22 @@ static __always_inline int btree_path_down(struct btree_trans *trans, if (ret) goto err; } else { - bch2_bkey_buf_unpack(&tmp, c, l->b, - bch2_btree_node_iter_peek(&l->iter, l->b)); + struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b); + if (!k) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "node not found at pos "); + bch2_bpos_to_text(&buf, path->pos); + prt_str(&buf, " within parent node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key)); + + bch2_fs_fatal_error(c, "%s", buf.buf); + printbuf_exit(&buf); + ret = -BCH_ERR_btree_need_topology_repair; + goto err; + } + + bch2_bkey_buf_unpack(&tmp, c, l->b, k); if ((flags & BTREE_ITER_PREFETCH) && c->opts.btree_node_prefetch) { @@ -962,7 +976,6 @@ err: return ret; } - static int bch2_btree_path_traverse_all(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -2790,6 +2803,31 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) struct btree_transaction_stats *s = btree_trans_stats(trans); s->max_mem = max(s->max_mem, new_bytes); + if (trans->used_mempool) { + if (trans->mem_bytes >= new_bytes) + goto out_change_top; + + /* No more space from mempool item, need malloc new one */ + new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); + if (unlikely(!new_mem)) { + bch2_trans_unlock(trans); + + new_mem = kmalloc(new_bytes, GFP_KERNEL); + if (!new_mem) + return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + + ret = bch2_trans_relock(trans); + if (ret) { + kfree(new_mem); + return ERR_PTR(ret); + } + } + memcpy(new_mem, trans->mem, trans->mem_top); + trans->used_mempool = false; + mempool_free(trans->mem, &c->btree_trans_mem_pool); + goto out_new_mem; + } + new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_mem)) { bch2_trans_unlock(trans); @@ -2798,6 +2836,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); new_bytes = BTREE_TRANS_MEM_MAX; + memcpy(new_mem, trans->mem, trans->mem_top); + trans->used_mempool = true; kfree(trans->mem); } @@ -2811,7 +2851,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (ret) return ERR_PTR(ret); } - +out_new_mem: trans->mem = new_mem; trans->mem_bytes = new_bytes; @@ -2819,7 +2859,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); } - +out_change_top: p = trans->mem + trans->mem_top; trans->mem_top += size; memset(p, 0, size); @@ -3093,7 +3133,7 @@ void bch2_trans_put(struct btree_trans *trans) if (paths_allocated != trans->_paths_allocated) kvfree_rcu_mightsleep(paths_allocated); - if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) + if (trans->used_mempool) mempool_free(trans->mem, &c->btree_trans_mem_pool); else kfree(trans->mem); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 50e04356d72c..5cbcbfe85235 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -261,6 +261,22 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, return bch2_journal_key_insert(c, id, level, &whiteout); } +bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, + unsigned level, struct bpos pos) +{ + struct journal_keys *keys = &trans->c->journal_keys; + size_t idx = bch2_journal_key_search(keys, btree, level, pos); + + if (!trans->journal_replay_not_finished) + return false; + + return (idx < keys->size && + keys->data[idx].btree_id == btree && + keys->data[idx].level == level && + bpos_eq(keys->data[idx].k->k.p, pos) && + bkey_deleted(&keys->data[idx].k->k)); +} + void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, unsigned level, struct bpos pos) { @@ -363,7 +379,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) { - struct bkey_s_c btree_k, journal_k, ret; + struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; if (iter->prefetch && iter->journal.level) btree_and_journal_iter_prefetch(iter); @@ -375,9 +391,10 @@ again: bpos_lt(btree_k.k->p, iter->pos)) bch2_journal_iter_advance_btree(iter); - while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && - bpos_lt(journal_k.k->p, iter->pos)) - bch2_journal_iter_advance(&iter->journal); + if (iter->trans->journal_replay_not_finished) + while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && + bpos_lt(journal_k.k->p, iter->pos)) + bch2_journal_iter_advance(&iter->journal); ret = journal_k.k && (!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p)) @@ -435,7 +452,9 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, bch2_btree_node_iter_init_from_start(&node_iter, b); __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key); - list_add(&iter->journal.list, &trans->c->journal_iters); + if (trans->journal_replay_not_finished && + !test_bit(BCH_FS_may_go_rw, &trans->c->flags)) + list_add(&iter->journal.list, &trans->c->journal_iters); } /* sort and dedup all keys in the journal: */ @@ -548,3 +567,22 @@ int bch2_journal_keys_sort(struct bch_fs *c) bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr); return 0; } + +void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, + unsigned level_min, unsigned level_max, + struct bpos start, struct bpos end) +{ + struct journal_keys *keys = &c->journal_keys; + size_t dst = 0; + + move_gap(keys, keys->nr); + + darray_for_each(*keys, i) + if (!(i->btree_id == btree && + i->level >= level_min && + i->level <= level_max && + bpos_ge(i->k->k.p, start) && + bpos_le(i->k->k.p, end))) + keys->data[dst++] = *i; + keys->nr = keys->gap = dst; +} diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h index c9d19da3ea04..af25046ebcaa 100644 --- a/fs/bcachefs/btree_journal_iter.h +++ b/fs/bcachefs/btree_journal_iter.h @@ -40,8 +40,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id, unsigned, struct bkey_i *); int bch2_journal_key_delete(struct bch_fs *, enum btree_id, unsigned, struct bpos); -void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, - unsigned, struct bpos); +bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, struct bpos); +void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); @@ -66,4 +66,8 @@ void bch2_journal_entries_free(struct bch_fs *); int bch2_journal_keys_sort(struct bch_fs *); +void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, + unsigned, unsigned, + struct bpos, struct bpos); + #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */ diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c new file mode 100644 index 000000000000..3f33be7e5e5c --- /dev/null +++ b/fs/bcachefs/btree_node_scan.c @@ -0,0 +1,495 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "btree_cache.h" +#include "btree_io.h" +#include "btree_journal_iter.h" +#include "btree_node_scan.h" +#include "btree_update_interior.h" +#include "buckets.h" +#include "error.h" +#include "journal_io.h" +#include "recovery_passes.h" + +#include <linux/kthread.h> +#include <linux/sort.h> + +struct find_btree_nodes_worker { + struct closure *cl; + struct find_btree_nodes *f; + struct bch_dev *ca; +}; + +static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n) +{ + prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie); + bch2_bpos_to_text(out, n->min_key); + prt_str(out, "-"); + bch2_bpos_to_text(out, n->max_key); + + if (n->range_updated) + prt_str(out, " range updated"); + if (n->overwritten) + prt_str(out, " overwritten"); + + for (unsigned i = 0; i < n->nr_ptrs; i++) { + prt_char(out, ' '); + bch2_extent_ptr_to_text(out, c, n->ptrs + i); + } +} + +static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes) +{ + printbuf_indent_add(out, 2); + darray_for_each(nodes, i) { + found_btree_node_to_text(out, c, i); + prt_newline(out); + } + printbuf_indent_sub(out, 2); +} + +static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f) +{ + struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k); + + set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs); + bp->k.p = f->max_key; + bp->v.seq = cpu_to_le64(f->cookie); + bp->v.sectors_written = 0; + bp->v.flags = 0; + bp->v.min_key = f->min_key; + SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated); + memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); +} + +static bool found_btree_node_is_readable(struct btree_trans *trans, + const struct found_btree_node *f) +{ + struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k; + + found_btree_node_to_key(&k.k, f); + + struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false); + bool ret = !IS_ERR_OR_NULL(b); + if (ret) + six_unlock_read(&b->c.lock); + + /* + * We might update this node's range; if that happens, we need the node + * to be re-read so the read path can trim keys that are no longer in + * this node + */ + if (b != btree_node_root(trans->c, b)) + bch2_btree_node_evict(trans, &k.k); + return ret; +} + +static int found_btree_node_cmp_cookie(const void *_l, const void *_r) +{ + const struct found_btree_node *l = _l; + const struct found_btree_node *r = _r; + + return cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->level, r->level) ?: + cmp_int(l->cookie, r->cookie); +} + +/* + * Given two found btree nodes, if their sequence numbers are equal, take the + * one that's readable: + */ +static int found_btree_node_cmp_time(const struct found_btree_node *l, + const struct found_btree_node *r) +{ + return cmp_int(l->seq, r->seq); +} + +static int found_btree_node_cmp_pos(const void *_l, const void *_r) +{ + const struct found_btree_node *l = _l; + const struct found_btree_node *r = _r; + + return cmp_int(l->btree_id, r->btree_id) ?: + -cmp_int(l->level, r->level) ?: + bpos_cmp(l->min_key, r->min_key) ?: + -found_btree_node_cmp_time(l, r); +} + +static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, + struct bio *bio, struct btree_node *bn, u64 offset) +{ + struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes); + + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, bn, PAGE_SIZE); + + submit_bio_wait(bio); + if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, + "IO error in try_read_btree_node() at %llu: %s", + offset, bch2_blk_status_to_str(bio->bi_status))) + return; + + if (le64_to_cpu(bn->magic) != bset_magic(c)) + return; + + rcu_read_lock(); + struct found_btree_node n = { + .btree_id = BTREE_NODE_ID(bn), + .level = BTREE_NODE_LEVEL(bn), + .seq = BTREE_NODE_SEQ(bn), + .cookie = le64_to_cpu(bn->keys.seq), + .min_key = bn->min_key, + .max_key = bn->max_key, + .nr_ptrs = 1, + .ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr, + .ptrs[0].offset = offset, + .ptrs[0].dev = ca->dev_idx, + .ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)), + }; + rcu_read_unlock(); + + if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) { + mutex_lock(&f->lock); + if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { + bch_err(c, "try_read_btree_node() can't handle endian conversion"); + f->ret = -EINVAL; + goto unlock; + } + + if (darray_push(&f->nodes, n)) + f->ret = -ENOMEM; +unlock: + mutex_unlock(&f->lock); + } +} + +static int read_btree_nodes_worker(void *p) +{ + struct find_btree_nodes_worker *w = p; + struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes); + struct bch_dev *ca = w->ca; + void *buf = (void *) __get_free_page(GFP_KERNEL); + struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL); + unsigned long last_print = jiffies; + + if (!buf || !bio) { + bch_err(c, "read_btree_nodes_worker: error allocating bio/buf"); + w->f->ret = -ENOMEM; + goto err; + } + + for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++) + for (unsigned bucket_offset = 0; + bucket_offset + btree_sectors(c) <= ca->mi.bucket_size; + bucket_offset += btree_sectors(c)) { + if (time_after(jiffies, last_print + HZ * 30)) { + u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset; + u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size; + + bch_info(ca, "%s: %2u%% done", __func__, + (unsigned) div64_u64(cur_sector * 100, end_sector)); + last_print = jiffies; + } + + try_read_btree_node(w->f, ca, bio, buf, + bucket * ca->mi.bucket_size + bucket_offset); + } +err: + bio_put(bio); + free_page((unsigned long) buf); + percpu_ref_get(&ca->io_ref); + closure_put(w->cl); + kfree(w); + return 0; +} + +static int read_btree_nodes(struct find_btree_nodes *f) +{ + struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes); + struct closure cl; + int ret = 0; + + closure_init_stack(&cl); + + for_each_online_member(c, ca) { + struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL); + struct task_struct *t; + + if (!w) { + percpu_ref_put(&ca->io_ref); + ret = -ENOMEM; + goto err; + } + + percpu_ref_get(&ca->io_ref); + closure_get(&cl); + w->cl = &cl; + w->f = f; + w->ca = ca; + + t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); + ret = IS_ERR_OR_NULL(t); + if (ret) { + percpu_ref_put(&ca->io_ref); + closure_put(&cl); + f->ret = ret; + bch_err(c, "error starting kthread: %i", ret); + break; + } + } +err: + closure_sync(&cl); + return f->ret ?: ret; +} + +static void bubble_up(struct found_btree_node *n, struct found_btree_node *end) +{ + while (n + 1 < end && + found_btree_node_cmp_pos(n, n + 1) > 0) { + swap(n[0], n[1]); + n++; + } +} + +static int handle_overwrites(struct bch_fs *c, + struct found_btree_node *start, + struct found_btree_node *end) +{ + struct found_btree_node *n; +again: + for (n = start + 1; + n < end && + n->btree_id == start->btree_id && + n->level == start->level && + bpos_lt(n->min_key, start->max_key); + n++) { + int cmp = found_btree_node_cmp_time(start, n); + + if (cmp > 0) { + if (bpos_cmp(start->max_key, n->max_key) >= 0) + n->overwritten = true; + else { + n->range_updated = true; + n->min_key = bpos_successor(start->max_key); + n->range_updated = true; + bubble_up(n, end); + goto again; + } + } else if (cmp < 0) { + BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0); + + start->max_key = bpos_predecessor(n->min_key); + start->range_updated = true; + } else { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "overlapping btree nodes with same seq! halting\n "); + found_btree_node_to_text(&buf, c, start); + prt_str(&buf, "\n "); + found_btree_node_to_text(&buf, c, n); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + return -1; + } + } + + return 0; +} + +int bch2_scan_for_btree_nodes(struct bch_fs *c) +{ + struct find_btree_nodes *f = &c->found_btree_nodes; + struct printbuf buf = PRINTBUF; + size_t dst; + int ret = 0; + + if (f->nodes.nr) + return 0; + + mutex_init(&f->lock); + + ret = read_btree_nodes(f); + if (ret) + return ret; + + if (!f->nodes.nr) { + bch_err(c, "%s: no btree nodes found", __func__); + ret = -EINVAL; + goto err; + } + + if (0 && c->opts.verbose) { + printbuf_reset(&buf); + prt_printf(&buf, "%s: nodes found:\n", __func__); + found_btree_nodes_to_text(&buf, c, f->nodes); + bch2_print_string_as_lines(KERN_INFO, buf.buf); + } + + sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); + + dst = 0; + darray_for_each(f->nodes, i) { + struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL; + + if (prev && + prev->cookie == i->cookie) { + if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) { + bch_err(c, "%s: found too many replicas for btree node", __func__); + ret = -EINVAL; + goto err; + } + prev->ptrs[prev->nr_ptrs++] = i->ptrs[0]; + } else { + f->nodes.data[dst++] = *i; + } + } + f->nodes.nr = dst; + + sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); + + if (0 && c->opts.verbose) { + printbuf_reset(&buf); + prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__); + found_btree_nodes_to_text(&buf, c, f->nodes); + bch2_print_string_as_lines(KERN_INFO, buf.buf); + } + + dst = 0; + darray_for_each(f->nodes, i) { + if (i->overwritten) + continue; + + ret = handle_overwrites(c, i, &darray_top(f->nodes)); + if (ret) + goto err; + + BUG_ON(i->overwritten); + f->nodes.data[dst++] = *i; + } + f->nodes.nr = dst; + + if (c->opts.verbose) { + printbuf_reset(&buf); + prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__); + found_btree_nodes_to_text(&buf, c, f->nodes); + bch2_print_string_as_lines(KERN_INFO, buf.buf); + } + + eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); +err: + printbuf_exit(&buf); + return ret; +} + +static int found_btree_node_range_start_cmp(const void *_l, const void *_r) +{ + const struct found_btree_node *l = _l; + const struct found_btree_node *r = _r; + + return cmp_int(l->btree_id, r->btree_id) ?: + -cmp_int(l->level, r->level) ?: + bpos_cmp(l->max_key, r->min_key); +} + +#define for_each_found_btree_node_in_range(_f, _search, _idx) \ + for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr, \ + sizeof((_f)->nodes.data[0]), \ + found_btree_node_range_start_cmp, &search); \ + _idx < (_f)->nodes.nr && \ + (_f)->nodes.data[_idx].btree_id == _search.btree_id && \ + (_f)->nodes.data[_idx].level == _search.level && \ + bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key); \ + _idx = eytzinger0_next(_idx, (_f)->nodes.nr)) + +bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b) +{ + struct find_btree_nodes *f = &c->found_btree_nodes; + + struct found_btree_node search = { + .btree_id = b->c.btree_id, + .level = b->c.level, + .min_key = b->data->min_key, + .max_key = b->key.k.p, + }; + + for_each_found_btree_node_in_range(f, search, idx) + if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data)) + return true; + return false; +} + +bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) +{ + struct found_btree_node search = { + .btree_id = btree, + .level = 0, + .min_key = POS_MIN, + .max_key = SPOS_MAX, + }; + + for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx) + return true; + return false; +} + +int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, + unsigned level, struct bpos node_min, struct bpos node_max) +{ + struct find_btree_nodes *f = &c->found_btree_nodes; + + int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + return ret; + + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level); + bch2_bpos_to_text(&buf, node_min); + prt_str(&buf, " - "); + bch2_bpos_to_text(&buf, node_max); + + bch_info(c, "%s(): %s", __func__, buf.buf); + printbuf_exit(&buf); + } + + struct found_btree_node search = { + .btree_id = btree, + .level = level, + .min_key = node_min, + .max_key = node_max, + }; + + for_each_found_btree_node_in_range(f, search, idx) { + struct found_btree_node n = f->nodes.data[idx]; + + n.range_updated |= bpos_lt(n.min_key, node_min); + n.min_key = bpos_max(n.min_key, node_min); + + n.range_updated |= bpos_gt(n.max_key, node_max); + n.max_key = bpos_min(n.max_key, node_max); + + struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp; + + found_btree_node_to_key(&tmp.k, &n); + + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); + bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); + printbuf_exit(&buf); + + BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + + ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); + if (ret) + return ret; + } + + return 0; +} + +void bch2_find_btree_nodes_exit(struct find_btree_nodes *f) +{ + darray_exit(&f->nodes); +} diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h new file mode 100644 index 000000000000..08687b209787 --- /dev/null +++ b/fs/bcachefs/btree_node_scan.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_NODE_SCAN_H +#define _BCACHEFS_BTREE_NODE_SCAN_H + +int bch2_scan_for_btree_nodes(struct bch_fs *); +bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *); +bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id); +int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos); +void bch2_find_btree_nodes_exit(struct find_btree_nodes *); + +#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */ diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h new file mode 100644 index 000000000000..abb7b27d556a --- /dev/null +++ b/fs/bcachefs/btree_node_scan_types.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H +#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H + +#include "darray.h" + +struct found_btree_node { + bool range_updated:1; + bool overwritten:1; + u8 btree_id; + u8 level; + u32 seq; + u64 cookie; + + struct bpos min_key; + struct bpos max_key; + + unsigned nr_ptrs; + struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX]; +}; + +typedef DARRAY(struct found_btree_node) found_btree_nodes; + +struct find_btree_nodes { + int ret; + struct mutex lock; + found_btree_nodes nodes; +}; + +#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */ diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 30d69a6d133e..aa9da4970740 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -318,7 +318,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && i->k->k.p.snapshot && - bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot)); + bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot) > 0); } static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, @@ -887,6 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, int ret, unsigned long trace_ip) { struct bch_fs *c = trans->c; + enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; switch (ret) { case -BCH_ERR_btree_insert_btree_node_full: @@ -905,7 +906,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, * flag */ if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && - (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) { + watermark < BCH_WATERMARK_reclaim) { ret = -BCH_ERR_journal_reclaim_would_deadlock; break; } diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index a4b40c1656a5..8e47e260eba5 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -38,6 +38,9 @@ static noinline int extent_front_merge(struct btree_trans *trans, struct bkey_i *update; int ret; + if (unlikely(trans->journal_replay_not_finished)) + return 0; + update = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(update); if (ret) @@ -69,6 +72,9 @@ static noinline int extent_back_merge(struct btree_trans *trans, struct bch_fs *c = trans->c; int ret; + if (unlikely(trans->journal_replay_not_finished)) + return 0; + ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); if (ret < 0) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b2f5f2e50f7e..32397b99752f 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_buf.h" #include "bkey_methods.h" #include "btree_cache.h" #include "btree_gc.h" @@ -18,12 +19,20 @@ #include "journal.h" #include "journal_reclaim.h" #include "keylist.h" +#include "recovery_passes.h" #include "replicas.h" #include "super-io.h" #include "trace.h" #include <linux/random.h> +const char * const bch2_btree_update_modes[] = { +#define x(t) #t, + BCH_WATERMARKS() +#undef x + NULL +}; + static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, btree_path_idx_t, struct btree *, struct keylist *); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); @@ -44,56 +53,103 @@ static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans, return path_idx; } -/* Debug code: */ - /* * Verify that child nodes correctly span parent node's range: */ -static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) +int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) { -#ifdef CONFIG_BCACHEFS_DEBUG - struct bpos next_node = b->data->min_key; - struct btree_node_iter iter; + struct bch_fs *c = trans->c; + struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2 + ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key + : b->data->min_key; + struct btree_and_journal_iter iter; struct bkey_s_c k; - struct bkey_s_c_btree_ptr_v2 bp; - struct bkey unpacked; - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; + struct bkey_buf prev; + int ret = 0; - BUG_ON(!b->c.level); + BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && + !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, + b->data->min_key)); - if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) - return; + if (!b->c.level) + return 0; - bch2_btree_node_iter_init_from_start(&iter, b); + bch2_bkey_buf_init(&prev); + bkey_init(&prev.k->k); + bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); - while (1) { - k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked); + while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { if (k.k->type != KEY_TYPE_btree_ptr_v2) - break; - bp = bkey_s_c_to_btree_ptr_v2(k); + goto out; - if (!bpos_eq(next_node, bp.v->min_key)) { - bch2_dump_btree_node(c, b); - bch2_bpos_to_text(&buf1, next_node); - bch2_bpos_to_text(&buf2, bp.v->min_key); - panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf); - } + struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); - bch2_btree_node_iter_advance(&iter, b); + struct bpos expected_min = bkey_deleted(&prev.k->k) + ? node_min + : bpos_successor(prev.k->k.p); - if (bch2_btree_node_iter_end(&iter)) { - if (!bpos_eq(k.k->p, b->key.k.p)) { - bch2_dump_btree_node(c, b); - bch2_bpos_to_text(&buf1, b->key.k.p); - bch2_bpos_to_text(&buf2, k.k->p); - panic("expected end %s got %s\n", buf1.buf, buf2.buf); - } - break; + if (!bpos_eq(expected_min, bp.v->min_key)) { + bch2_topology_error(c); + + printbuf_reset(&buf); + prt_str(&buf, "end of prev node doesn't match start of next node\n"), + prt_printf(&buf, " in btree %s level %u node ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, "\n prev "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); + prt_str(&buf, "\n next "); + bch2_bkey_val_to_text(&buf, c, k); + + need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf); + goto topology_repair; } - next_node = bpos_successor(k.k->p); + bch2_bkey_buf_reassemble(&prev, c, k); + bch2_btree_and_journal_iter_advance(&iter); + } + + if (bkey_deleted(&prev.k->k)) { + bch2_topology_error(c); + + printbuf_reset(&buf); + prt_str(&buf, "empty interior node\n"); + prt_printf(&buf, " in btree %s level %u node ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf); + goto topology_repair; + } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) { + bch2_topology_error(c); + + printbuf_reset(&buf); + prt_str(&buf, "last child node doesn't end at end of parent node\n"); + prt_printf(&buf, " in btree %s level %u node ", + bch2_btree_id_str(b->c.btree_id), b->c.level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, "\n last key "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); + + need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf); + goto topology_repair; } -#endif +out: +fsck_err: + bch2_btree_and_journal_iter_exit(&iter); + bch2_bkey_buf_exit(&prev, c); + printbuf_exit(&buf); + return ret; +topology_repair: + if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) && + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) { + bch2_inconsistent_error(c); + ret = -BCH_ERR_btree_need_topology_repair; + } else { + ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); + } + goto out; } /* Calculate ideal packed bkey format for new btree nodes: */ @@ -254,7 +310,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct open_buckets obs = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim + unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim ? BTREE_NODE_RESERVE : 0; int ret; @@ -638,7 +694,7 @@ static void btree_update_nodes_written(struct btree_update *as) * which may require allocations as well. */ ret = commit_do(trans, &as->disk_res, &journal_seq, - BCH_WATERMARK_reclaim| + BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_journal_reclaim, @@ -797,11 +853,11 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) mutex_lock(&c->btree_interior_update_lock); list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); + BUG_ON(as->mode != BTREE_UPDATE_none); BUG_ON(!btree_node_dirty(b)); BUG_ON(!b->c.level); - as->mode = BTREE_INTERIOR_UPDATING_NODE; + as->mode = BTREE_UPDATE_node; as->b = b; set_btree_node_write_blocked(b); @@ -824,7 +880,7 @@ static void btree_update_reparent(struct btree_update *as, lockdep_assert_held(&c->btree_interior_update_lock); child->b = NULL; - child->mode = BTREE_INTERIOR_UPDATING_AS; + child->mode = BTREE_UPDATE_update; bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, bch2_update_reparent_journal_pin_flush); @@ -835,7 +891,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) struct bkey_i *insert = &b->key; struct bch_fs *c = as->c; - BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); + BUG_ON(as->mode != BTREE_UPDATE_none); BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) > ARRAY_SIZE(as->journal_entries)); @@ -849,7 +905,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) mutex_lock(&c->btree_interior_update_lock); list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - as->mode = BTREE_INTERIOR_UPDATING_ROOT; + as->mode = BTREE_UPDATE_root; mutex_unlock(&c->btree_interior_update_lock); } @@ -1027,7 +1083,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * struct bch_fs *c = as->c; u64 start_time = as->start_time; - BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); + BUG_ON(as->mode == BTREE_UPDATE_none); if (as->took_gc_lock) up_read(&as->c->gc_lock); @@ -1072,7 +1128,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK; if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && - watermark != BCH_WATERMARK_reclaim) + watermark < BCH_WATERMARK_reclaim) journal_flags |= JOURNAL_RES_GET_NONBLOCK; ret = drop_locks_do(trans, @@ -1123,7 +1179,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, as->c = c; as->start_time = start_time; as->ip_started = _RET_IP_; - as->mode = BTREE_INTERIOR_NO_UPDATE; + as->mode = BTREE_UPDATE_none; + as->watermark = watermark; as->took_gc_lock = true; as->btree_id = path->btree_id; as->update_level = update_level; @@ -1168,7 +1225,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, */ if (bch2_err_matches(ret, ENOSPC) && (flags & BCH_TRANS_COMMIT_journal_reclaim) && - watermark != BCH_WATERMARK_reclaim) { + watermark < BCH_WATERMARK_reclaim) { ret = -BCH_ERR_journal_reclaim_would_deadlock; goto err; } @@ -1380,9 +1437,16 @@ static void __btree_split_node(struct btree_update *as, if (bkey_deleted(k)) continue; + uk = bkey_unpack_key(b, k); + + if (b->c.level && + u64s < n1_u64s && + u64s + k->u64s >= n1_u64s && + bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p)) + n1_u64s += k->u64s; + i = u64s >= n1_u64s; u64s += k->u64s; - uk = bkey_unpack_key(b, k); if (!i) n1_pos = uk.p; bch2_bkey_format_add_key(&format[i], &uk); @@ -1441,8 +1505,7 @@ static void __btree_split_node(struct btree_update *as, bch2_verify_btree_nr_keys(n[i]); - if (b->c.level) - btree_node_interior_verify(as->c, n[i]); + BUG_ON(bch2_btree_node_check_topology(trans, n[i])); } } @@ -1473,7 +1536,7 @@ static void btree_split_insert_keys(struct btree_update *as, __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); - btree_node_interior_verify(as->c, b); + BUG_ON(bch2_btree_node_check_topology(trans, b)); } } @@ -1488,9 +1551,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, u64 start_time = local_clock(); int ret = 0; + bch2_verify_btree_nr_keys(b); BUG_ON(!parent && (b != btree_node_root(c, b))); BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1)); + ret = bch2_btree_node_check_topology(trans, b); + if (ret) + return ret; + bch2_btree_interior_update_will_free_node(as, b); if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { @@ -1710,7 +1778,11 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - btree_node_interior_verify(c, b); + ret = bch2_btree_node_check_topology(trans, b); + if (ret) { + bch2_btree_node_unlock_write(trans, path, b); + return ret; + } bch2_btree_insert_keys_interior(as, trans, path, b, keys); @@ -1728,7 +1800,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_btree_node_unlock_write(trans, path, b); - btree_node_interior_verify(c, b); + BUG_ON(bch2_btree_node_check_topology(trans, b)); return 0; split: /* @@ -1818,9 +1890,12 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path, { struct bch_fs *c = trans->c; struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b; + + if (btree_node_fake(b)) + return bch2_btree_split_leaf(trans, path, flags); + struct btree_update *as = - bch2_btree_update_start(trans, trans->paths + path, - b->c.level, true, flags); + bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -2391,7 +2466,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b) bch2_btree_set_root_inmem(c, b); } -static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) +static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level) { struct bch_fs *c = trans->c; struct closure cl; @@ -2410,7 +2485,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) set_btree_node_fake(b); set_btree_node_need_rewrite(b); - b->c.level = 0; + b->c.level = level; b->c.btree_id = id; bkey_btree_ptr_init(&b->key); @@ -2437,9 +2512,21 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) return 0; } -void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) +void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) +{ + bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level)); +} + +static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) { - bch2_trans_run(c, __bch2_btree_root_alloc(trans, id)); + prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + (void *) as->ip_started, + bch2_btree_id_str(as->btree_id), + bch2_watermarks[as->watermark], + bch2_btree_update_modes[as->mode], + as->nodes_written, + closure_nr_remaining(&as->cl), + as->journal.seq); } void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) @@ -2448,12 +2535,7 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) mutex_lock(&c->btree_interior_update_lock); list_for_each_entry(as, &c->btree_interior_update_list, list) - prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n", - (void *) as->ip_started, - as->mode, - as->nodes_written, - closure_nr_remaining(&as->cl), - as->journal.seq); + bch2_btree_update_to_text(out, as); mutex_unlock(&c->btree_interior_update_lock); } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index f651dd48aaa0..88dcf5a22a3b 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -10,6 +10,20 @@ #define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1)) +int bch2_btree_node_check_topology(struct btree_trans *, struct btree *); + +#define BTREE_UPDATE_MODES() \ + x(none) \ + x(node) \ + x(root) \ + x(update) + +enum btree_update_mode { +#define x(n) BTREE_UPDATE_##n, + BTREE_UPDATE_MODES() +#undef x +}; + /* * Tracks an in progress split/rewrite of a btree node and the update to the * parent node: @@ -37,14 +51,8 @@ struct btree_update { struct list_head list; struct list_head unwritten_list; - /* What kind of update are we doing? */ - enum { - BTREE_INTERIOR_NO_UPDATE, - BTREE_INTERIOR_UPDATING_NODE, - BTREE_INTERIOR_UPDATING_ROOT, - BTREE_INTERIOR_UPDATING_AS, - } mode; - + enum btree_update_mode mode; + enum bch_watermark watermark; unsigned nodes_written:1; unsigned took_gc_lock:1; @@ -54,7 +62,7 @@ struct btree_update { struct disk_reservation disk_res; /* - * BTREE_INTERIOR_UPDATING_NODE: + * BTREE_UPDATE_node: * The update that made the new nodes visible was a regular update to an * existing interior node - @b. We can't write out the update to @b * until the new nodes we created are finished writing, so we block @b @@ -163,7 +171,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, struct bkey_i *, unsigned, bool); void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); -void bch2_btree_root_alloc(struct bch_fs *, enum btree_id); +void bch2_btree_root_alloc_fake(struct bch_fs *, enum btree_id, unsigned); static inline unsigned btree_update_reserve_required(struct bch_fs *c, struct btree *b) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 5cbad8445782..baf63e2fddb6 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -11,6 +11,7 @@ #include "journal_reclaim.h" #include <linux/prefetch.h> +#include <linux/sort.h> static int bch2_btree_write_buffer_journal_flush(struct journal *, struct journal_entry_pin *, u64); @@ -46,6 +47,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke #endif } +static int wb_key_seq_cmp(const void *_l, const void *_r) +{ + const struct btree_write_buffered_key *l = _l; + const struct btree_write_buffered_key *r = _r; + + return cmp_int(l->journal_seq, r->journal_seq); +} + /* Compare excluding idx, the low 24 bits: */ static inline bool wb_key_eq(const void *_l, const void *_r) { @@ -357,6 +366,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) */ trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr); + sort(wb->flushing.keys.data, + wb->flushing.keys.nr, + sizeof(wb->flushing.keys.data[0]), + wb_key_seq_cmp, NULL); + darray_for_each(wb->flushing.keys, i) { if (!i->journal_seq) continue; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 96edf2c34d43..941401a210f5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -525,6 +525,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, "different types of data in same bucket: %s, %s", bch2_data_type_str(g->data_type), bch2_data_type_str(data_type))) { + BUG(); ret = -EIO; goto err; } @@ -628,6 +629,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans, bch2_data_type_str(ptr_data_type), (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + BUG(); ret = -EIO; goto err; } @@ -815,14 +817,14 @@ static int __mark_pointer(struct btree_trans *trans, static int bch2_trigger_pointer(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, struct extent_ptr_decoded p, - s64 *sectors, - unsigned flags) + const union bch_extent_entry *entry, + s64 *sectors, unsigned flags) { bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); struct bpos bucket; struct bch_backpointer bp; - bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp); + bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, entry, &bucket, &bp); *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len); if (flags & BTREE_TRIGGER_TRANSACTIONAL) { @@ -851,7 +853,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, if (flags & BTREE_TRIGGER_GC) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); percpu_down_read(&c->mark_lock); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); @@ -979,7 +981,7 @@ static int __trigger_extent(struct btree_trans *trans, bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { s64 disk_sectors; - ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags); + ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); if (ret < 0) return ret; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 6387e039f789..00aaf4bb5139 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma fallthrough; case BCH_WATERMARK_btree_copygc: case BCH_WATERMARK_reclaim: + case BCH_WATERMARK_interior_updates: break; } diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 38defa19d52d..cbfa6459bdbc 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -7,7 +7,7 @@ #include "chardev.h" #include "journal.h" #include "move.h" -#include "recovery.h" +#include "recovery_passes.h" #include "replicas.h" #include "super.h" #include "super-io.h" diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 4150feca42a2..34731ee0217f 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -14,6 +14,7 @@ #include "move.h" #include "nocow_locking.h" #include "rebalance.h" +#include "snapshot.h" #include "subvolume.h" #include "trace.h" @@ -509,6 +510,14 @@ int bch2_data_update_init(struct btree_trans *trans, unsigned ptrs_locked = 0; int ret = 0; + /* + * fs is corrupt we have a key for a snapshot node that doesn't exist, + * and we have to check for this because we go rw before repairing the + * snapshots table - just skip it, we can move it later. + */ + if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) + return -BCH_ERR_data_update_done; + bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; @@ -571,8 +580,7 @@ int bch2_data_update_init(struct btree_trans *trans, move_ctxt_wait_event(ctxt, (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, PTR_BUCKET_POS(c, &p.ptr), 0)) || - (!atomic_read(&ctxt->read_sectors) && - !atomic_read(&ctxt->write_sectors))); + list_empty(&ctxt->ios)); if (!locked) bch2_bucket_nocow_lock(&c->nocow_locks, diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index af25d8ec60f2..01a79fa3eacb 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -252,7 +252,8 @@ x(BCH_ERR_nopromote, nopromote_in_flight) \ x(BCH_ERR_nopromote, nopromote_no_writes) \ x(BCH_ERR_nopromote, nopromote_enomem) \ - x(0, need_inode_lock) + x(0, need_inode_lock) \ + x(0, invalid_snapshot_node) enum bch_errcode { BCH_ERR_START = 2048, diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 043431206799..82a6656c941c 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "error.h" -#include "recovery.h" +#include "journal.h" +#include "recovery_passes.h" #include "super.h" #include "thread_with_file.h" @@ -16,7 +17,8 @@ bool bch2_inconsistent_error(struct bch_fs *c) return false; case BCH_ON_ERROR_ro: if (bch2_fs_emergency_read_only(c)) - bch_err(c, "inconsistency detected - emergency read only"); + bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", + journal_cur_seq(&c->journal)); return true; case BCH_ON_ERROR_panic: panic(bch2_fmt(c, "panic after error")); diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index ae1d6674c512..36caedf72d89 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -32,6 +32,12 @@ bool bch2_inconsistent_error(struct bch_fs *); int bch2_topology_error(struct bch_fs *); +#define bch2_fs_topology_error(c, ...) \ +({ \ + bch_err(c, "btree topology error: " __VA_ARGS__); \ + bch2_topology_error(c); \ +}) + #define bch2_fs_inconsistent(c, ...) \ ({ \ bch_err(c, __VA_ARGS__); \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 61395b113df9..0e3ca99fbd2d 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -189,13 +189,18 @@ int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { + struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err, - btree_ptr_v2_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, + c, err, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); + bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), + c, err, btree_ptr_v2_min_key_bad, + "min_key > key"); + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); fsck_err: return ret; @@ -973,6 +978,31 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) return bkey_deleted(k.k); } +void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) +{ + struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] + ? bch_dev_bkey_exists(c, ptr->dev) + : NULL; + + if (!ca) { + prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, + (u64) ptr->offset, ptr->gen, + ptr->cached ? " cached" : ""); + } else { + u32 offset; + u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); + + prt_printf(out, "ptr: %u:%llu:%u gen %u", + ptr->dev, b, offset, ptr->gen); + if (ptr->cached) + prt_str(out, " cached"); + if (ptr->unwritten) + prt_str(out, " unwritten"); + if (ca && ptr_stale(ca, ptr)) + prt_printf(out, " stale"); + } +} + void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { @@ -988,31 +1018,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " "); switch (__extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: { - const struct bch_extent_ptr *ptr = entry_to_ptr(entry); - struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] - ? bch_dev_bkey_exists(c, ptr->dev) - : NULL; - - if (!ca) { - prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, - (u64) ptr->offset, ptr->gen, - ptr->cached ? " cached" : ""); - } else { - u32 offset; - u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - - prt_printf(out, "ptr: %u:%llu:%u gen %u", - ptr->dev, b, offset, ptr->gen); - if (ptr->cached) - prt_str(out, " cached"); - if (ptr->unwritten) - prt_str(out, " unwritten"); - if (ca && ptr_stale(ca, ptr)) - prt_printf(out, " stale"); - } + case BCH_EXTENT_ENTRY_ptr: + bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry)); break; - } + case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: case BCH_EXTENT_ENTRY_crc128: { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index fd2669cdd76f..528e817eacbd 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -596,30 +596,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) return ret; } -static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr) -{ - switch (k.k->type) { - case KEY_TYPE_btree_ptr: - case KEY_TYPE_btree_ptr_v2: - return BCH_DATA_btree; - case KEY_TYPE_extent: - case KEY_TYPE_reflink_v: - return BCH_DATA_user; - case KEY_TYPE_stripe: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - BUG_ON(ptr < s.v->ptrs || - ptr >= s.v->ptrs + s.v->nr_blocks); - - return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant - ? BCH_DATA_parity - : BCH_DATA_user; - } - default: - BUG(); - } -} - unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c); @@ -700,6 +676,7 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s); void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); +void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c new file mode 100644 index 000000000000..4ce5e957a6e9 --- /dev/null +++ b/fs/bcachefs/eytzinger.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "eytzinger.h" + +/** + * is_aligned - is this pointer & size okay for word-wide copying? + * @base: pointer to data + * @size: size of each element + * @align: required alignment (typically 4 or 8) + * + * Returns true if elements can be copied using word loads and stores. + * The size must be a multiple of the alignment, and the base address must + * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + * + * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" + * to "if ((a | b) & mask)", so we do that by hand. + */ +__attribute_const__ __always_inline +static bool is_aligned(const void *base, size_t size, unsigned char align) +{ + unsigned char lsbits = (unsigned char)size; + + (void)base; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + lsbits |= (unsigned char)(uintptr_t)base; +#endif + return (lsbits & (align - 1)) == 0; +} + +/** + * swap_words_32 - swap two elements in 32-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 4) + * + * Exchange the two objects in memory. This exploits base+index addressing, + * which basically all CPUs have, to minimize loop overhead computations. + * + * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the + * bottom of the loop, even though the zero flag is still valid from the + * subtract (since the intervening mov instructions don't alter the flags). + * Gcc 8.1.0 doesn't have that problem. + */ +static void swap_words_32(void *a, void *b, size_t n) +{ + do { + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + } while (n); +} + +/** + * swap_words_64 - swap two elements in 64-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 8) + * + * Exchange the two objects in memory. This exploits base+index + * addressing, which basically all CPUs have, to minimize loop overhead + * computations. + * + * We'd like to use 64-bit loads if possible. If they're not, emulating + * one requires base+index+4 addressing which x86 has but most other + * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, + * but it's possible to have 64-bit loads without 64-bit pointers (e.g. + * x32 ABI). Are there any cases the kernel needs to worry about? + */ +static void swap_words_64(void *a, void *b, size_t n) +{ + do { +#ifdef CONFIG_64BIT + u64 t = *(u64 *)(a + (n -= 8)); + *(u64 *)(a + n) = *(u64 *)(b + n); + *(u64 *)(b + n) = t; +#else + /* Use two 32-bit transfers to avoid base+index+4 addressing */ + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + + t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; +#endif + } while (n); +} + +/** + * swap_bytes - swap two elements a byte at a time + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size + * + * This is the fallback if alignment doesn't allow using larger chunks. + */ +static void swap_bytes(void *a, void *b, size_t n) +{ + do { + char t = ((char *)a)[--n]; + ((char *)a)[n] = ((char *)b)[n]; + ((char *)b)[n] = t; + } while (n); +} + +/* + * The values are arbitrary as long as they can't be confused with + * a pointer, but small integers make for the smallest compare + * instructions. + */ +#define SWAP_WORDS_64 (swap_r_func_t)0 +#define SWAP_WORDS_32 (swap_r_func_t)1 +#define SWAP_BYTES (swap_r_func_t)2 +#define SWAP_WRAPPER (swap_r_func_t)3 + +struct wrapper { + cmp_func_t cmp; + swap_func_t swap; +}; + +/* + * The function pointer is last to make tail calls most efficient if the + * compiler decides not to inline this function. + */ +static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv) +{ + if (swap_func == SWAP_WRAPPER) { + ((const struct wrapper *)priv)->swap(a, b, (int)size); + return; + } + + if (swap_func == SWAP_WORDS_64) + swap_words_64(a, b, size); + else if (swap_func == SWAP_WORDS_32) + swap_words_32(a, b, size); + else if (swap_func == SWAP_BYTES) + swap_bytes(a, b, size); + else + swap_func(a, b, (int)size, priv); +} + +#define _CMP_WRAPPER ((cmp_r_func_t)0L) + +static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv) +{ + if (cmp == _CMP_WRAPPER) + return ((const struct wrapper *)priv)->cmp(a, b); + return cmp(a, b, priv); +} + +static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size, + cmp_r_func_t cmp_func, const void *priv, + size_t l, size_t r) +{ + return do_cmp(base + inorder_to_eytzinger0(l, n) * size, + base + inorder_to_eytzinger0(r, n) * size, + cmp_func, priv); +} + +static inline void eytzinger0_do_swap(void *base, size_t n, size_t size, + swap_r_func_t swap_func, const void *priv, + size_t l, size_t r) +{ + do_swap(base + inorder_to_eytzinger0(l, n) * size, + base + inorder_to_eytzinger0(r, n) * size, + size, swap_func, priv); +} + +void eytzinger0_sort_r(void *base, size_t n, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv) +{ + int i, c, r; + + /* called from 'sort' without swap function, let's pick the default */ + if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap) + swap_func = NULL; + + if (!swap_func) { + if (is_aligned(base, size, 8)) + swap_func = SWAP_WORDS_64; + else if (is_aligned(base, size, 4)) + swap_func = SWAP_WORDS_32; + else + swap_func = SWAP_BYTES; + } + + /* heapify */ + for (i = n / 2 - 1; i >= 0; --i) { + for (r = i; r * 2 + 1 < n; r = c) { + c = r * 2 + 1; + + if (c + 1 < n && + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) + c++; + + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) + break; + + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + } + } + + /* sort */ + for (i = n - 1; i > 0; --i) { + eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i); + + for (r = 0; r * 2 + 1 < i; r = c) { + c = r * 2 + 1; + + if (c + 1 < i && + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) + c++; + + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) + break; + + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + } + } +} + +void eytzinger0_sort(void *base, size_t n, size_t size, + cmp_func_t cmp_func, + swap_func_t swap_func) +{ + struct wrapper w = { + .cmp = cmp_func, + .swap = swap_func, + }; + + return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); +} diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index b04750dbf870..ee0e2df33322 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -5,23 +5,33 @@ #include <linux/bitops.h> #include <linux/log2.h> -#include "util.h" +#ifdef EYTZINGER_DEBUG +#define EYTZINGER_BUG_ON(cond) BUG_ON(cond) +#else +#define EYTZINGER_BUG_ON(cond) +#endif /* * Traversal for trees in eytzinger layout - a full binary tree layed out in an - * array - */ - -/* - * One based indexing version: + * array. * - * With one based indexing each level of the tree starts at a power of two - - * good for cacheline alignment: + * Consider using an eytzinger tree any time you would otherwise be doing binary + * search over an array. Binary search is a worst case scenario for branch + * prediction and prefetching, but in an eytzinger tree every node's children + * are adjacent in memory, thus we can prefetch children before knowing the + * result of the comparison, assuming multiple nodes fit on a cacheline. + * + * Two variants are provided, for one based indexing and zero based indexing. + * + * Zero based indexing is more convenient, but one based indexing has better + * alignment and thus better performance because each new level of the tree + * starts at a power of two, and thus if element 0 was cacheline aligned, each + * new level will be as well. */ static inline unsigned eytzinger1_child(unsigned i, unsigned child) { - EBUG_ON(child > 1); + EYTZINGER_BUG_ON(child > 1); return (i << 1) + child; } @@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size) static inline unsigned eytzinger1_next(unsigned i, unsigned size) { - EBUG_ON(i > size); + EYTZINGER_BUG_ON(i > size); if (eytzinger1_right_child(i) <= size) { i = eytzinger1_right_child(i); @@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size) static inline unsigned eytzinger1_prev(unsigned i, unsigned size) { - EBUG_ON(i > size); + EYTZINGER_BUG_ON(i > size); if (eytzinger1_left_child(i) <= size) { i = eytzinger1_left_child(i) + 1; @@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, unsigned shift = __fls(size) - b; int s; - EBUG_ON(!i || i > size); + EYTZINGER_BUG_ON(!i || i > size); i ^= 1U << b; i <<= 1; @@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, unsigned shift; int s; - EBUG_ON(!i || i > size); + EYTZINGER_BUG_ON(!i || i > size); /* * sign bit trick: @@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) static inline unsigned eytzinger0_child(unsigned i, unsigned child) { - EBUG_ON(child > 1); + EYTZINGER_BUG_ON(child > 1); return (i << 1) + 1 + child; } @@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) (_i) != -1; \ (_i) = eytzinger0_next((_i), (_size))) -typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size); - /* return greatest node <= @search, or -1 if not found */ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, - eytzinger_cmp_fn cmp, const void *search) + cmp_func_t cmp, const void *search) { unsigned i, n = 0; @@ -244,21 +252,24 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, do { i = n; - n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0); + n = eytzinger0_child(i, cmp(base + i * size, search) <= 0); } while (n < nr); if (n & 1) { /* @i was greater than @search, return previous node: */ - - if (i == eytzinger0_first(nr)) - return -1; - return eytzinger0_prev(i, nr); } else { return i; } } +static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size, + cmp_func_t cmp, const void *search) +{ + ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search); + return eytzinger0_next(idx, size); +} + #define eytzinger0_find(base, nr, size, _cmp, search) \ ({ \ void *_base = (base); \ @@ -269,13 +280,13 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, int _res; \ \ while (_i < _nr && \ - (_res = _cmp(_search, _base + _i * _size, _size))) \ + (_res = _cmp(_search, _base + _i * _size))) \ _i = eytzinger0_child(_i, _res > 0); \ _i; \ }) -void eytzinger0_sort(void *, size_t, size_t, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)); +void eytzinger0_sort_r(void *, size_t, size_t, + cmp_r_func_t, swap_r_func_t, const void *); +void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); #endif /* _EYTZINGER_H */ diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 33cb6da3a5ad..f49e6c0f0f68 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -536,7 +536,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio) if (likely(!dio->iter.count) || dio->op.error) break; - bio_reset(bio, NULL, REQ_OP_WRITE); + bio_reset(bio, NULL, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); } out: return bch2_dio_write_done(dio); @@ -618,7 +618,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) bio = bio_alloc_bioset(NULL, bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), - REQ_OP_WRITE, + REQ_OP_WRITE | REQ_SYNC | REQ_IDLE, GFP_KERNEL, &c->dio_write_bioset); dio = container_of(bio, struct dio_write, op.wbio.bio); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0ccee05f6887..b5ea9fa1259d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1997,6 +1997,7 @@ out: return dget(sb->s_root); err_put_super: + __bch2_fs_stop(c); deactivate_locked_super(sb); return ERR_PTR(bch2_err_class(ret)); } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 47d4eefaba7b..8e2010212cc3 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -12,7 +12,7 @@ #include "fsck.h" #include "inode.h" #include "keylist.h" -#include "recovery.h" +#include "recovery_passes.h" #include "snapshot.h" #include "super.h" #include "xattr.h" @@ -63,9 +63,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol, u32 *snapshot, u64 *inum) { struct bch_subvolume s; - int ret; - - ret = bch2_subvolume_get(trans, subvol, false, 0, &s); + int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); *snapshot = le32_to_cpu(s.snapshot); *inum = le64_to_cpu(s.inode); @@ -158,9 +156,10 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); - ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - &dir_hash_info, &iter, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_hash_delete_at(trans, bch2_dirent_hash_desc, + &dir_hash_info, &iter, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); bch2_trans_iter_exit(trans, &iter); err: bch_err_fn(c, ret); @@ -169,7 +168,8 @@ err: /* Get lost+found, create if it doesn't exist: */ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - struct bch_inode_unpacked *lostfound) + struct bch_inode_unpacked *lostfound, + u64 reattaching_inum) { struct bch_fs *c = trans->c; struct qstr lostfound_str = QSTR("lost+found"); @@ -184,19 +184,36 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; - u32 subvol_snapshot; - ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol), - &subvol_snapshot, &root_inum.inum); - bch_err_msg(c, ret, "looking up root subvol"); + struct bch_subvolume subvol; + ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), + false, 0, &subvol); + bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", + le32_to_cpu(st.master_subvol), snapshot); if (ret) return ret; + if (!subvol.inode) { + struct btree_iter iter; + struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), + 0, subvolume); + ret = PTR_ERR_OR_ZERO(subvol); + if (ret) + return ret; + + subvol->v.inode = cpu_to_le64(reattaching_inum); + bch2_trans_iter_exit(trans, &iter); + } + + root_inum.inum = le64_to_cpu(subvol.inode); + struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; u32 root_inode_snapshot = snapshot; ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); - bch_err_msg(c, ret, "looking up root inode"); + bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", + root_inum.inum, le32_to_cpu(st.master_subvol)); if (ret) return ret; @@ -292,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans, snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); } - ret = lookup_lostfound(trans, dirent_snapshot, &lostfound); + ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); if (ret) return ret; @@ -363,6 +380,112 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume return ret; } +static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) +{ + struct bch_fs *c = trans->c; + + if (!bch2_snapshot_is_leaf(c, snapshotid)) { + bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); + return -BCH_ERR_fsck_repair_unimplemented; + } + + /* + * If inum isn't set, that means we're being called from check_dirents, + * not check_inodes - the root of this subvolume doesn't exist or we + * would have found it there: + */ + if (!inum) { + struct btree_iter inode_iter = {}; + struct bch_inode_unpacked new_inode; + u64 cpu = raw_smp_processor_id(); + + bch2_inode_init_early(c, &new_inode); + bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); + + new_inode.bi_subvol = subvolid; + + int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: + bch2_btree_iter_traverse(&inode_iter) ?: + bch2_inode_write(trans, &inode_iter, &new_inode); + bch2_trans_iter_exit(trans, &inode_iter); + if (ret) + return ret; + + inum = new_inode.bi_inum; + } + + bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); + + struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); + int ret = PTR_ERR_OR_ZERO(new_subvol); + if (ret) + return ret; + + bkey_subvolume_init(&new_subvol->k_i); + new_subvol->k.p.offset = subvolid; + new_subvol->v.snapshot = cpu_to_le32(snapshotid); + new_subvol->v.inode = cpu_to_le64(inum); + ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); + if (ret) + return ret; + + struct btree_iter iter; + struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_snapshots, POS(0, snapshotid), + 0, snapshot); + ret = PTR_ERR_OR_ZERO(s); + bch_err_msg(c, ret, "getting snapshot %u", snapshotid); + if (ret) + return ret; + + u32 snapshot_tree = le32_to_cpu(s->v.tree); + + s->v.subvol = cpu_to_le32(subvolid); + SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); + bch2_trans_iter_exit(trans, &iter); + + struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_snapshot_trees, POS(0, snapshot_tree), + 0, snapshot_tree); + ret = PTR_ERR_OR_ZERO(st); + bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); + if (ret) + return ret; + + if (!st->v.master_subvol) + st->v.master_subvol = cpu_to_le32(subvolid); + + bch2_trans_iter_exit(trans, &iter); + return 0; +} + +static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode) +{ + struct bch_fs *c = trans->c; + struct bch_inode_unpacked new_inode; + + bch2_inode_init_early(c, &new_inode); + bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL); + new_inode.bi_size = size; + new_inode.bi_inum = inum; + + return __bch2_fsck_write_inode(trans, &new_inode, snapshot); +} + +static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum) +{ + struct btree_iter iter = {}; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); + struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); + bch2_trans_iter_exit(trans, &iter); + int ret = bkey_err(k); + if (ret) + return ret; + + return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG); +} + struct snapshots_seen_entry { u32 id; u32 equiv; @@ -1064,6 +1187,11 @@ static int check_inode(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; + if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { + ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); + goto do_update; + } + if (fsck_err_on(ret, c, inode_bi_subvol_missing, "inode %llu:%u bi_subvol points to missing subvolume %u", @@ -1081,7 +1209,7 @@ static int check_inode(struct btree_trans *trans, do_update = true; } } - +do_update: if (do_update) { ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck updating inode"); @@ -1130,8 +1258,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal i->count = count2; if (i->count != count2) { - bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", - w->last_pos.inode, i->snapshot, i->count, count2); + bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", + w->last_pos.inode, i->snapshot, i->count, count2); return -BCH_ERR_internal_fsck_err; } @@ -1371,10 +1499,6 @@ static int check_overlapping_extents(struct btree_trans *trans, goto err; } - ret = extent_ends_at(c, extent_ends, seen, k); - if (ret) - goto err; - extent_ends->last_pos = k.k->p; err: return ret; @@ -1438,6 +1562,17 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto err; if (k.k->type != KEY_TYPE_whiteout) { + if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { + ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + inode->last_pos.inode--; + ret = -BCH_ERR_transaction_restart_nested; + goto err; + } + if (fsck_err_on(!i, c, extent_in_missing_inode, "extent in missing inode:\n %s", (printbuf_reset(&buf), @@ -1504,6 +1639,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, i->seen_this_pos = true; } + + if (k.k->type != KEY_TYPE_whiteout) { + ret = extent_ends_at(c, extent_ends, s, k); + if (ret) + goto err; + } out: err: fsck_err: @@ -1584,8 +1725,8 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ return count2; if (i->count != count2) { - bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu", - i->count, count2); + bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", + w->last_pos.inode, i->snapshot, i->count, count2); i->count = count2; if (i->inode.bi_nlink == i->count) continue; @@ -1782,6 +1923,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); u32 parent_snapshot; + u32 new_parent_subvol = 0; u64 parent_inum; struct printbuf buf = PRINTBUF; int ret = 0; @@ -1790,6 +1932,27 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * if (ret && !bch2_err_matches(ret, ENOENT)) return ret; + if (ret || + (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { + int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); + if (ret2 && !bch2_err_matches(ret, ENOENT)) + return ret2; + } + + if (ret && + !new_parent_subvol && + (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { + /* + * Couldn't find a subvol for dirent's snapshot - but we lost + * subvols, so we need to reconstruct: + */ + ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); + if (ret) + return ret; + + parent_snapshot = d.k->p.snapshot; + } + if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, "dirent parent_subvol points to missing subvolume\n%s", (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || @@ -1798,10 +1961,10 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", parent_snapshot, (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { - u32 new_parent_subvol; - ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); - if (ret) - goto err; + if (!new_parent_subvol) { + bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); + return -BCH_ERR_fsck_repair_unimplemented; + } struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); ret = PTR_ERR_OR_ZERO(new_dirent); @@ -1847,9 +2010,16 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; + goto err; + + if (ret) { + bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); + ret = -BCH_ERR_fsck_repair_unimplemented; + ret = 0; + goto err; + } - if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol, + if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, c, inode_bi_parent_wrong, "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", target_inum, @@ -1857,13 +2027,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * subvol_root.bi_parent_subvol = parent_subvol; ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; } ret = check_dirent_target(trans, iter, d, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; out: err: fsck_err: @@ -1880,7 +2050,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct snapshots_seen *s) { struct bch_fs *c = trans->c; - struct bkey_s_c_dirent d; struct inode_walker_entry *i; struct printbuf buf = PRINTBUF; struct bpos equiv; @@ -1919,6 +2088,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); dir->first_this_inode = false; + if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { + ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + dir->last_pos.inode--; + ret = -BCH_ERR_transaction_restart_nested; + goto err; + } + if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, "dirent in nonexisting directory:\n%s", (printbuf_reset(&buf), @@ -1953,7 +2133,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (k.k->type != KEY_TYPE_dirent) goto out; - d = bkey_s_c_to_dirent(k); + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); if (d.v->d_type == DT_SUBVOL) { ret = check_dirent_to_subvol(trans, iter, d); @@ -2098,17 +2278,21 @@ static int check_root_trans(struct btree_trans *trans) if (mustfix_fsck_err_on(ret, c, root_subvol_missing, "root subvol missing")) { - struct bkey_i_subvolume root_subvol; + struct bkey_i_subvolume *root_subvol = + bch2_trans_kmalloc(trans, sizeof(*root_subvol)); + ret = PTR_ERR_OR_ZERO(root_subvol); + if (ret) + goto err; snapshot = U32_MAX; inum = BCACHEFS_ROOT_INO; - bkey_subvolume_init(&root_subvol.k_i); - root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL; - root_subvol.v.flags = 0; - root_subvol.v.snapshot = cpu_to_le32(snapshot); - root_subvol.v.inode = cpu_to_le64(inum); - ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0); + bkey_subvolume_init(&root_subvol->k_i); + root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; + root_subvol->v.flags = 0; + root_subvol->v.snapshot = cpu_to_le32(snapshot); + root_subvol->v.inode = cpu_to_le64(inum); + ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); bch_err_msg(c, ret, "writing root subvol"); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 2b5e06770ab3..ca4a066e9a54 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -552,8 +552,8 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, prt_printf(out, "bi_sectors=%llu", inode->bi_sectors); prt_newline(out); - prt_newline(out); prt_printf(out, "bi_version=%llu", inode->bi_version); + prt_newline(out); #define x(_name, _bits) \ prt_printf(out, #_name "=%llu", (u64) inode->_name); \ diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 1baf78594cca..82f9170dab3f 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -264,6 +264,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, ret = 0; err: bch2_logged_op_finish(trans, op_k); + bch_err_fn(c, ret); return ret; } @@ -476,6 +477,7 @@ case LOGGED_OP_FINSERT_finish: break; } err: + bch_err_fn(c, ret); bch2_logged_op_finish(trans, op_k); bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index b5303874fc35..37a024e034d4 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -95,8 +95,7 @@ out: return ret ?: bch2_blacklist_table_initialize(c); } -static int journal_seq_blacklist_table_cmp(const void *_l, - const void *_r, size_t size) +static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r) { const struct journal_seq_blacklist_table_entry *l = _l; const struct journal_seq_blacklist_table_entry *r = _r; diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 9fac838d123e..b82f8209041f 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -37,7 +37,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); struct bkey_buf sk; u32 restart_count = trans->restart_count; - int ret; if (!fn) return 0; @@ -45,11 +44,11 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_init(&sk); bch2_bkey_buf_reassemble(&sk, c, k); - ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?: - fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count); + fn->resume(trans, sk.k); bch2_bkey_buf_exit(&sk, c); - return ret; + + return trans_was_restarted(trans, restart_count); } int bch2_resume_logged_ops(struct bch_fs *c) diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c index db63b3f3b338..4c298e74723d 100644 --- a/fs/bcachefs/mean_and_variance_test.c +++ b/fs/bcachefs/mean_and_variance_test.c @@ -136,20 +136,8 @@ static void mean_and_variance_test_1(struct kunit *test) d, mean, stddev, weighted_mean, weighted_stddev); } -static void mean_and_variance_test_2(struct kunit *test) -{ - s64 d[] = { 100, 10, 10, 10, 10, 10, 10 }; - s64 mean[] = { 10, 10, 10, 10, 10, 10, 10 }; - s64 stddev[] = { 9, 9, 9, 9, 9, 9, 9 }; - s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 }; - s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 }; - - do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, - d, mean, stddev, weighted_mean, weighted_stddev); -} - /* Test behaviour where we switch from one steady state to another: */ -static void mean_and_variance_test_3(struct kunit *test) +static void mean_and_variance_test_2(struct kunit *test) { s64 d[] = { 100, 100, 100, 100, 100 }; s64 mean[] = { 22, 32, 40, 46, 50 }; @@ -161,18 +149,6 @@ static void mean_and_variance_test_3(struct kunit *test) d, mean, stddev, weighted_mean, weighted_stddev); } -static void mean_and_variance_test_4(struct kunit *test) -{ - s64 d[] = { 100, 100, 100, 100, 100 }; - s64 mean[] = { 10, 11, 12, 13, 14 }; - s64 stddev[] = { 9, 13, 15, 17, 19 }; - s64 weighted_mean[] = { 32, 49, 61, 71, 78 }; - s64 weighted_stddev[] = { 38, 44, 44, 41, 38 }; - - do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, - d, mean, stddev, weighted_mean, weighted_stddev); -} - static void mean_and_variance_fast_divpow2(struct kunit *test) { s64 i; @@ -230,8 +206,6 @@ static struct kunit_case mean_and_variance_test_cases[] = { KUNIT_CASE(mean_and_variance_weighted_advanced_test), KUNIT_CASE(mean_and_variance_test_1), KUNIT_CASE(mean_and_variance_test_2), - KUNIT_CASE(mean_and_variance_test_3), - KUNIT_CASE(mean_and_variance_test_4), {} }; diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 08ea0cfc4aef..e1800c4119b5 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -7,6 +7,7 @@ #include "disk_groups.h" #include "error.h" #include "opts.h" +#include "recovery_passes.h" #include "super-io.h" #include "util.h" @@ -205,6 +206,9 @@ const struct bch_option bch2_opt_table[] = { #define OPT_STR(_choices) .type = BCH_OPT_STR, \ .min = 0, .max = ARRAY_SIZE(_choices), \ .choices = _choices +#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \ + .min = 0, .max = U64_MAX, \ + .choices = _choices #define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \ diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 136083c11f3a..1ac4135cca1c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -362,12 +362,17 @@ enum fsck_err_opts { OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ - NULL, "Don't replay the journal") \ - x(keep_journal, u8, \ + NULL, "Exit recovery immediately prior to journal replay")\ + x(recovery_pass_last, u8, \ + OPT_FS|OPT_MOUNT, \ + OPT_STR_NOLIMIT(bch2_recovery_passes), \ + BCH2_NO_SB_OPT, 0, \ + NULL, "Exit recovery after specified pass") \ + x(retain_recovery_info, u8, \ 0, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ - NULL, "Don't free journal entries/keys after startup")\ + NULL, "Don't free journal entries/keys, scanned btree nodes after startup")\ x(read_entire_journal, u8, \ 0, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 03f9d6afe467..b76c16152579 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1,35 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "backpointers.h" -#include "bkey_buf.h" #include "alloc_background.h" -#include "btree_gc.h" +#include "bkey_buf.h" #include "btree_journal_iter.h" +#include "btree_node_scan.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_io.h" #include "buckets.h" #include "dirent.h" -#include "ec.h" #include "errcode.h" #include "error.h" #include "fs-common.h" -#include "fsck.h" #include "journal_io.h" #include "journal_reclaim.h" #include "journal_seq_blacklist.h" -#include "lru.h" #include "logged_ops.h" #include "move.h" #include "quota.h" #include "rebalance.h" #include "recovery.h" +#include "recovery_passes.h" #include "replicas.h" #include "sb-clean.h" #include "sb-downgrade.h" #include "snapshot.h" -#include "subvolume.h" #include "super-io.h" #include <linux/sort.h> @@ -37,6 +33,20 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } +void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +{ + u64 b = BIT_ULL(btree); + + if (!(c->sb.btrees_lost_data & b)) { + bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); + + mutex_lock(&c->sb_lock); + bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } +} + static bool btree_id_is_alloc(enum btree_id id) { switch (id) { @@ -52,7 +62,7 @@ static bool btree_id_is_alloc(enum btree_id id) } /* for -o reconstruct_alloc: */ -static void do_reconstruct_alloc(struct bch_fs *c) +static void bch2_reconstruct_alloc(struct bch_fs *c) { bch2_journal_log_msg(c, "dropping alloc info"); bch_info(c, "dropping and reconstructing all alloc info"); @@ -87,15 +97,17 @@ static void do_reconstruct_alloc(struct bch_fs *c) c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - struct journal_keys *keys = &c->journal_keys; - size_t src, dst; - move_gap(keys, keys->nr); - - for (src = 0, dst = 0; src < keys->nr; src++) - if (!btree_id_is_alloc(keys->data[src].btree_id)) - keys->data[dst++] = keys->data[src]; - keys->nr = keys->gap = dst; + bch2_shoot_down_journal_keys(c, BTREE_ID_alloc, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_freespace, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens, + 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); } /* @@ -186,7 +198,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq, r->journal_seq); } -static int bch2_journal_replay(struct bch_fs *c) +int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; DARRAY(struct journal_key *) keys_sorted = { 0 }; @@ -194,6 +206,7 @@ static int bch2_journal_replay(struct bch_fs *c) u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; struct btree_trans *trans = bch2_trans_get(c); + bool immediate_flush = false; int ret = 0; if (keys->nr) { @@ -215,6 +228,13 @@ static int bch2_journal_replay(struct bch_fs *c) darray_for_each(*keys, k) { cond_resched(); + /* + * k->allocated means the key wasn't read in from the journal, + * rather it was from early repair code + */ + if (k->allocated) + immediate_flush = true; + /* Skip fastpath if we're low on space in the journal */ ret = c->journal.watermark ? -1 : commit_do(trans, NULL, NULL, @@ -266,7 +286,8 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_trans_put(trans); trans = NULL; - if (!c->opts.keep_journal) + if (!c->opts.retain_recovery_info && + c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) bch2_journal_keys_put_initial(c); replay_now_at(j, j->replay_journal_seq_end); @@ -274,6 +295,12 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_journal_set_replay_done(j); + /* if we did any repair, flush it immediately */ + if (immediate_flush) { + bch2_journal_flush_all_pins(&c->journal); + ret = bch2_journal_meta(&c->journal); + } + if (keys->nr) bch2_journal_log_msg(c, "journal replay finished"); err: @@ -423,10 +450,9 @@ static int journal_replay_early(struct bch_fs *c, static int read_btree_roots(struct bch_fs *c) { - unsigned i; int ret = 0; - for (i = 0; i < btree_id_nr_alive(c); i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (!r->alive) @@ -435,186 +461,46 @@ static int read_btree_roots(struct bch_fs *c) if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) continue; - if (r->error) { - __fsck_err(c, - btree_id_is_alloc(i) - ? FSCK_CAN_IGNORE : 0, - btree_root_bkey_invalid, - "invalid btree root %s", - bch2_btree_id_str(i)); - if (i == BTREE_ID_alloc) + if (mustfix_fsck_err_on((ret = r->error), + c, btree_root_bkey_invalid, + "invalid btree root %s", + bch2_btree_id_str(i)) || + mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), + c, btree_root_read_error, + "error reading btree root %s l=%u: %s", + bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { + if (btree_id_is_alloc(i)) { + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); - } + r->error = 0; + } else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { + bch_info(c, "will run btree node scan"); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); + } - ret = bch2_btree_root_read(c, i, &r->key, r->level); - if (ret) { - fsck_err(c, - btree_root_read_error, - "error reading btree root %s", - bch2_btree_id_str(i)); - if (btree_id_is_alloc(i)) - c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); ret = 0; + bch2_btree_lost_data(c, i); } } - for (i = 0; i < BTREE_ID_NR; i++) { + for (unsigned i = 0; i < BTREE_ID_NR; i++) { struct btree_root *r = bch2_btree_id_root(c, i); - if (!r->b) { + if (!r->b && !r->error) { r->alive = false; r->level = 0; - bch2_btree_root_alloc(c, i); + bch2_btree_root_alloc_fake(c, i, 0); } } fsck_err: return ret; } -static int bch2_initialize_subvolumes(struct bch_fs *c) -{ - struct bkey_i_snapshot_tree root_tree; - struct bkey_i_snapshot root_snapshot; - struct bkey_i_subvolume root_volume; - int ret; - - bkey_snapshot_tree_init(&root_tree.k_i); - root_tree.k.p.offset = 1; - root_tree.v.master_subvol = cpu_to_le32(1); - root_tree.v.root_snapshot = cpu_to_le32(U32_MAX); - - bkey_snapshot_init(&root_snapshot.k_i); - root_snapshot.k.p.offset = U32_MAX; - root_snapshot.v.flags = 0; - root_snapshot.v.parent = 0; - root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL); - root_snapshot.v.tree = cpu_to_le32(1); - SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true); - - bkey_subvolume_init(&root_volume.k_i); - root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; - root_volume.v.flags = 0; - root_volume.v.snapshot = cpu_to_le32(U32_MAX); - root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); - - ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: - bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: - bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); - bch_err_fn(c, ret); - return ret; -} - -static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) -{ - struct btree_iter iter; - struct bkey_s_c k; - struct bch_inode_unpacked inode; - int ret; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); - ret = bkey_err(k); - if (ret) - return ret; - - if (!bkey_is_inode(k.k)) { - bch_err(trans->c, "root inode not found"); - ret = -BCH_ERR_ENOENT_inode; - goto err; - } - - ret = bch2_inode_unpack(k, &inode); - BUG_ON(ret); - - inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; - - ret = bch2_inode_write(trans, &iter, &inode); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -/* set bi_subvol on root inode */ -noinline_for_stack -static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) -{ - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, - __bch2_fs_upgrade_for_subvolumes(trans)); - bch_err_fn(c, ret); - return ret; -} - -const char * const bch2_recovery_passes[] = { -#define x(_fn, ...) #_fn, - BCH_RECOVERY_PASSES() -#undef x - NULL -}; - -static int bch2_check_allocations(struct bch_fs *c) -{ - return bch2_gc(c, true, c->opts.norecovery); -} - -static int bch2_set_may_go_rw(struct bch_fs *c) -{ - struct journal_keys *keys = &c->journal_keys; - - /* - * After we go RW, the journal keys buffer can't be modified (except for - * setting journal_key->overwritten: it will be accessed by multiple - * threads - */ - move_gap(keys, keys->nr); - - set_bit(BCH_FS_may_go_rw, &c->flags); - - if (keys->nr || c->opts.fsck || !c->sb.clean) - return bch2_fs_read_write_early(c); - return 0; -} - -struct recovery_pass_fn { - int (*fn)(struct bch_fs *); - unsigned when; -}; - -static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, - BCH_RECOVERY_PASSES() -#undef x -}; - -u64 bch2_recovery_passes_to_stable(u64 v) -{ - static const u8 map[] = { -#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, - BCH_RECOVERY_PASSES() -#undef x - }; - - u64 ret = 0; - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) - if (v & BIT_ULL(i)) - ret |= BIT_ULL(map[i]); - return ret; -} - -u64 bch2_recovery_passes_from_stable(u64 v) -{ - static const u8 map[] = { -#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, - BCH_RECOVERY_PASSES() -#undef x - }; - - u64 ret = 0; - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) - if (v & BIT_ULL(i)) - ret |= BIT_ULL(map[i]); - return ret; -} - static bool check_version_upgrade(struct bch_fs *c) { unsigned latest_version = bcachefs_metadata_version_current; @@ -687,96 +573,6 @@ static bool check_version_upgrade(struct bch_fs *c) return false; } -u64 bch2_fsck_recovery_passes(void) -{ - u64 ret = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) - if (recovery_pass_fns[i].when & PASS_FSCK) - ret |= BIT_ULL(i); - return ret; -} - -static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -{ - struct recovery_pass_fn *p = recovery_pass_fns + pass; - - if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) - return false; - if (c->recovery_passes_explicit & BIT_ULL(pass)) - return true; - if ((p->when & PASS_FSCK) && c->opts.fsck) - return true; - if ((p->when & PASS_UNCLEAN) && !c->sb.clean) - return true; - if (p->when & PASS_ALWAYS) - return true; - return false; -} - -static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -{ - struct recovery_pass_fn *p = recovery_pass_fns + pass; - int ret; - - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), - bch2_recovery_passes[pass]); - ret = p->fn(c); - if (ret) - return ret; - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_CONT " done\n"); - - return 0; -} - -static int bch2_run_recovery_passes(struct bch_fs *c) -{ - int ret = 0; - - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { - if (should_run_recovery_pass(c, c->curr_recovery_pass)) { - unsigned pass = c->curr_recovery_pass; - - ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || - (ret && c->curr_recovery_pass < pass)) - continue; - if (ret) - break; - - c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); - } - c->curr_recovery_pass++; - c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); - } - - return ret; -} - -int bch2_run_online_recovery_passes(struct bch_fs *c) -{ - int ret = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { - struct recovery_pass_fn *p = recovery_pass_fns + i; - - if (!(p->when & PASS_ONLINE)) - continue; - - ret = bch2_run_recovery_pass(c, i); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { - i = c->curr_recovery_pass; - continue; - } - if (ret) - break; - } - - return ret; -} - int bch2_fs_recovery(struct bch_fs *c) { struct bch_sb_field_clean *clean = NULL; @@ -809,24 +605,14 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (c->opts.fsck && c->opts.norecovery) { - bch_err(c, "cannot select both norecovery and fsck"); - ret = -EINVAL; - goto err; - } + if (c->opts.norecovery) + c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; if (!c->opts.nochanges) { mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; - struct bch_sb_field_ext *ext = - bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); - if (!ext) { - ret = -BCH_ERR_ENOSPC_sb; - mutex_unlock(&c->sb_lock); - goto err; - } - if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { ext->recovery_passes_required[0] |= cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); @@ -885,7 +671,7 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { + if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) { struct genradix_iter iter; struct journal_replay **i; @@ -965,7 +751,7 @@ use_clean: c->journal_replay_seq_end = blacklist_seq - 1; if (c->opts.reconstruct_alloc) - do_reconstruct_alloc(c); + bch2_reconstruct_alloc(c); zero_out_btree_mem_ptr(&c->journal_keys); @@ -1017,6 +803,12 @@ use_clean: clear_bit(BCH_FS_fsck_running, &c->flags); + /* fsync if we fixed errors */ + if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + bch2_journal_flush_all_pins(&c->journal); + bch2_journal_meta(&c->journal); + } + /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && test_bit(BCH_FS_errors_fixed, &c->flags) && @@ -1051,6 +843,7 @@ use_clean: } mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { @@ -1064,15 +857,18 @@ use_clean: write_sb = true; } - if (!test_bit(BCH_FS_error, &c->flags)) { - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - if (ext && - (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || - !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) { - memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required)); - memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); - write_sb = true; - } + if (!test_bit(BCH_FS_error, &c->flags) && + !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) { + memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); + write_sb = true; + } + + if (c->opts.fsck && + !test_bit(BCH_FS_error, &c->flags) && + c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 && + ext->btrees_lost_data) { + ext->btrees_lost_data = 0; + write_sb = true; } if (c->opts.fsck && @@ -1113,9 +909,10 @@ use_clean: out: bch2_flush_fsck_errs(c); - if (!c->opts.keep_journal && - test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) + if (!c->opts.retain_recovery_info) { bch2_journal_keys_put_initial(c); + bch2_find_btree_nodes_exit(&c->found_btree_nodes); + } kfree(clean); if (!ret && @@ -1141,6 +938,7 @@ int bch2_fs_initialize(struct bch_fs *c) int ret; bch_notice(c, "initializing new filesystem"); + set_bit(BCH_FS_new_fs, &c->flags); mutex_lock(&c->sb_lock); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); @@ -1155,11 +953,11 @@ int bch2_fs_initialize(struct bch_fs *c) } mutex_unlock(&c->sb_lock); - c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); + c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; set_bit(BCH_FS_may_go_rw, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) - bch2_btree_root_alloc(c, i); + bch2_btree_root_alloc_fake(c, i, 0); for_each_member_device(c, ca) bch2_dev_usage_init(ca); @@ -1230,7 +1028,7 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1; + c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1; if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index 4e9d24719b2e..4bf818de1f2f 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -2,37 +2,9 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -extern const char * const bch2_recovery_passes[]; +void bch2_btree_lost_data(struct bch_fs *, enum btree_id); -u64 bch2_recovery_passes_to_stable(u64 v); -u64 bch2_recovery_passes_from_stable(u64 v); - -/* - * For when we need to rewind recovery passes and run a pass we skipped: - */ -static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, - enum bch_recovery_pass pass) -{ - if (c->recovery_passes_explicit & BIT_ULL(pass)) - return 0; - - bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", - bch2_recovery_passes[pass], pass, - bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); - - c->recovery_passes_explicit |= BIT_ULL(pass); - - if (c->curr_recovery_pass >= pass) { - c->curr_recovery_pass = pass; - c->recovery_passes_complete &= (1ULL << pass) >> 1; - return -BCH_ERR_restart_recovery; - } else { - return 0; - } -} - -int bch2_run_online_recovery_passes(struct bch_fs *); -u64 bch2_fsck_recovery_passes(void); +int bch2_journal_replay(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c new file mode 100644 index 000000000000..cb501460d615 --- /dev/null +++ b/fs/bcachefs/recovery_passes.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "alloc_background.h" +#include "backpointers.h" +#include "btree_gc.h" +#include "btree_node_scan.h" +#include "ec.h" +#include "fsck.h" +#include "inode.h" +#include "journal.h" +#include "lru.h" +#include "logged_ops.h" +#include "rebalance.h" +#include "recovery.h" +#include "recovery_passes.h" +#include "snapshot.h" +#include "subvolume.h" +#include "super.h" +#include "super-io.h" + +const char * const bch2_recovery_passes[] = { +#define x(_fn, ...) #_fn, + BCH_RECOVERY_PASSES() +#undef x + NULL +}; + +static int bch2_check_allocations(struct bch_fs *c) +{ + return bch2_gc(c, true, false); +} + +static int bch2_set_may_go_rw(struct bch_fs *c) +{ + struct journal_keys *keys = &c->journal_keys; + + /* + * After we go RW, the journal keys buffer can't be modified (except for + * setting journal_key->overwritten: it will be accessed by multiple + * threads + */ + move_gap(keys, keys->nr); + + set_bit(BCH_FS_may_go_rw, &c->flags); + + if (keys->nr || c->opts.fsck || !c->sb.clean) + return bch2_fs_read_write_early(c); + return 0; +} + +struct recovery_pass_fn { + int (*fn)(struct bch_fs *); + unsigned when; +}; + +static struct recovery_pass_fn recovery_pass_fns[] = { +#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, + BCH_RECOVERY_PASSES() +#undef x +}; + +static const u8 passes_to_stable_map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, + BCH_RECOVERY_PASSES() +#undef x +}; + +static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass) +{ + return passes_to_stable_map[pass]; +} + +u64 bch2_recovery_passes_to_stable(u64 v) +{ + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(passes_to_stable_map[i]); + return ret; +} + +u64 bch2_recovery_passes_from_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + +/* + * For when we need to rewind recovery passes and run a pass we skipped: + */ +int bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + if (c->recovery_passes_explicit & BIT_ULL(pass)) + return 0; + + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", + bch2_recovery_passes[pass], pass, + bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); + + c->recovery_passes_explicit |= BIT_ULL(pass); + + if (c->curr_recovery_pass >= pass) { + c->curr_recovery_pass = pass; + c->recovery_passes_complete &= (1ULL << pass) >> 1; + return -BCH_ERR_restart_recovery; + } else { + return 0; + } +} + +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); + + mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + if (!test_bit_le64(s, ext->recovery_passes_required)) { + __set_bit_le64(s, ext->recovery_passes_required); + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); + + return bch2_run_explicit_recovery_pass(c, pass); +} + +static void bch2_clear_recovery_pass_required(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); + + mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + if (test_bit_le64(s, ext->recovery_passes_required)) { + __clear_bit_le64(s, ext->recovery_passes_required); + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); +} + +u64 bch2_fsck_recovery_passes(void) +{ + u64 ret = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) + if (recovery_pass_fns[i].when & PASS_FSCK) + ret |= BIT_ULL(i); + return ret; +} + +static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) +{ + struct recovery_pass_fn *p = recovery_pass_fns + pass; + + if (c->recovery_passes_explicit & BIT_ULL(pass)) + return true; + if ((p->when & PASS_FSCK) && c->opts.fsck) + return true; + if ((p->when & PASS_UNCLEAN) && !c->sb.clean) + return true; + if (p->when & PASS_ALWAYS) + return true; + return false; +} + +static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) +{ + struct recovery_pass_fn *p = recovery_pass_fns + pass; + int ret; + + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), + bch2_recovery_passes[pass]); + ret = p->fn(c); + if (ret) + return ret; + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_CONT " done\n"); + + return 0; +} + +int bch2_run_online_recovery_passes(struct bch_fs *c) +{ + int ret = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { + struct recovery_pass_fn *p = recovery_pass_fns + i; + + if (!(p->when & PASS_ONLINE)) + continue; + + ret = bch2_run_recovery_pass(c, i); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { + i = c->curr_recovery_pass; + continue; + } + if (ret) + break; + } + + return ret; +} + +int bch2_run_recovery_passes(struct bch_fs *c) +{ + int ret = 0; + + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { + if (c->opts.recovery_pass_last && + c->curr_recovery_pass > c->opts.recovery_pass_last) + break; + + if (should_run_recovery_pass(c, c->curr_recovery_pass)) { + unsigned pass = c->curr_recovery_pass; + + ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || + (ret && c->curr_recovery_pass < pass)) + continue; + if (ret) + break; + + c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); + } + + c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); + + if (!test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); + + c->curr_recovery_pass++; + } + + return ret; +} diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h new file mode 100644 index 000000000000..99b464e127b8 --- /dev/null +++ b/fs/bcachefs/recovery_passes.h @@ -0,0 +1,17 @@ +#ifndef _BCACHEFS_RECOVERY_PASSES_H +#define _BCACHEFS_RECOVERY_PASSES_H + +extern const char * const bch2_recovery_passes[]; + +u64 bch2_recovery_passes_to_stable(u64 v); +u64 bch2_recovery_passes_from_stable(u64 v); + +u64 bch2_fsck_recovery_passes(void); + +int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); + +int bch2_run_online_recovery_passes(struct bch_fs *); +int bch2_run_recovery_passes(struct bch_fs *); + +#endif /* _BCACHEFS_RECOVERY_PASSES_H */ diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_passes_types.h index 4959e95e7c74..773aea9a0080 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_passes_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_RECOVERY_TYPES_H -#define _BCACHEFS_RECOVERY_TYPES_H +#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H +#define _BCACHEFS_RECOVERY_PASSES_TYPES_H #define PASS_SILENT BIT(0) #define PASS_FSCK BIT(1) @@ -13,6 +13,7 @@ * must never change: */ #define BCH_RECOVERY_PASSES() \ + x(scan_for_btree_nodes, 37, 0) \ x(check_topology, 4, 0) \ x(alloc_read, 0, PASS_ALWAYS) \ x(stripes_read, 1, PASS_ALWAYS) \ @@ -31,13 +32,13 @@ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ x(bucket_gens_init, 17, 0) \ + x(reconstruct_snapshots, 38, 0) \ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ x(fs_upgrade_for_subvolumes, 22, 0) \ - x(resume_logged_ops, 23, PASS_ALWAYS) \ x(check_inodes, 24, PASS_FSCK) \ x(check_extents, 25, PASS_FSCK) \ x(check_indirect_extents, 26, PASS_FSCK) \ @@ -47,6 +48,7 @@ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ + x(resume_logged_ops, 23, PASS_ALWAYS) \ x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ x(fix_reflink_p, 33, 0) \ x(set_fs_needs_rebalance, 34, 0) \ @@ -56,6 +58,7 @@ enum bch_recovery_pass { #define x(n, id, when) BCH_RECOVERY_PASS_##n, BCH_RECOVERY_PASSES() #undef x + BCH_RECOVERY_PASS_NR }; /* But we also need stable identifiers that can be used in the superblock */ @@ -65,4 +68,4 @@ enum bch_recovery_pass_stable { #undef x }; -#endif /* _BCACHEFS_RECOVERY_TYPES_H */ +#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index c47c66c2b394..ff7864731a07 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -185,8 +185,7 @@ not_found: } else { bkey_error_init(update); update->k.p = p.k->p; - update->k.p.offset = next_idx; - update->k.size = next_idx - *idx; + update->k.size = p.k->size; set_bkey_val_u64s(&update->k, 0); } diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index cc2672c12031..678b9c20e251 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -6,12 +6,15 @@ #include "replicas.h" #include "super-io.h" +#include <linux/sort.h> + static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, struct bch_replicas_cpu *); /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ -static int bch2_memcmp(const void *l, const void *r, size_t size) +static int bch2_memcmp(const void *l, const void *r, const void *priv) { + size_t size = (size_t) priv; return memcmp(l, r, size); } @@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) { - eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); + eytzinger0_sort_r(r->entries, r->nr, r->entry_size, + bch2_memcmp, NULL, (void *)(size_t)r->entry_size); } static void bch2_replicas_entry_v0_to_text(struct printbuf *out, @@ -228,7 +232,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, verify_replicas_entry(search); -#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size) +#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) idx = eytzinger0_find(r->entries, r->nr, r->entry_size, entry_cmp, search); #undef entry_cmp @@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, { unsigned i; - sort_cmp_size(cpu_r->entries, - cpu_r->nr, - cpu_r->entry_size, - bch2_memcmp, NULL); + sort_r(cpu_r->entries, + cpu_r->nr, + cpu_r->entry_size, + bch2_memcmp, NULL, + (void *)(size_t)cpu_r->entry_size); for (i = 0; i < cpu_r->nr; i++) { struct bch_replicas_entry_v1 *e = diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index e4396cb0bacb..d6f81179c3a2 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -7,7 +7,7 @@ #include "bcachefs.h" #include "darray.h" -#include "recovery.h" +#include "recovery_passes.h" #include "sb-downgrade.h" #include "sb-errors.h" #include "super-io.h" diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index 5178bf579f7c..d7d609131030 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -265,7 +265,12 @@ x(subvol_children_bad, 257) \ x(subvol_loop, 258) \ x(subvol_unreachable, 259) \ - x(btree_node_bkey_bad_u64s, 260) + x(btree_node_bkey_bad_u64s, 260) \ + x(btree_node_topology_empty_interior_node, 261) \ + x(btree_ptr_v2_min_key_bad, 262) \ + x(btree_root_unreadable_and_scan_found_nothing, 263) \ + x(snapshot_node_missing, 264) \ + x(dup_backpointer_to_bad_csum_extent, 265) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 39debe814bf3..0e806f04f3d7 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -8,6 +8,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "recovery_passes.h" #include "snapshot.h" #include <linux/random.h> @@ -93,8 +94,10 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans, static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) { - while (id && id < ancestor) - id = __snapshot_t(t, id)->parent; + while (id && id < ancestor) { + const struct snapshot_t *s = __snapshot_t(t, id); + id = s ? s->parent : 0; + } return id == ancestor; } @@ -110,6 +113,8 @@ static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancest static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) { const struct snapshot_t *s = __snapshot_t(t, id); + if (!s) + return 0; if (s->skip[2] <= ancestor) return s->skip[2]; @@ -127,7 +132,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) rcu_read_lock(); struct snapshot_table *t = rcu_dereference(c->snapshots); - if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) { + if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) { ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor); goto out; } @@ -151,36 +156,39 @@ out: static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) { size_t idx = U32_MAX - id; - size_t new_size; struct snapshot_table *new, *old; - new_size = max(16UL, roundup_pow_of_two(idx + 1)); + size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1)); + size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]); - new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); + new = kvzalloc(new_bytes, GFP_KERNEL); if (!new) return NULL; + new->nr = new_size; + old = rcu_dereference_protected(c->snapshots, true); if (old) - memcpy(new->s, - rcu_dereference_protected(c->snapshots, true)->s, - sizeof(new->s[0]) * c->snapshot_table_size); + memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr); rcu_assign_pointer(c->snapshots, new); - c->snapshot_table_size = new_size; - kvfree_rcu_mightsleep(old); + kvfree_rcu(old, rcu); - return &rcu_dereference_protected(c->snapshots, true)->s[idx]; + return &rcu_dereference_protected(c->snapshots, + lockdep_is_held(&c->snapshot_table_lock))->s[idx]; } static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) { size_t idx = U32_MAX - id; + struct snapshot_table *table = + rcu_dereference_protected(c->snapshots, + lockdep_is_held(&c->snapshot_table_lock)); lockdep_assert_held(&c->snapshot_table_lock); - if (likely(idx < c->snapshot_table_size)) - return &rcu_dereference_protected(c->snapshots, true)->s[idx]; + if (likely(table && idx < table->nr)) + return &table->s[idx]; return __snapshot_t_mut(c, id); } @@ -567,6 +575,13 @@ static int check_snapshot_tree(struct btree_trans *trans, u32 subvol_id; ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); + bch_err_fn(c, ret); + + if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */ + ret = 0; + goto err; + } + if (ret) goto err; @@ -724,7 +739,6 @@ static int check_snapshot(struct btree_trans *trans, u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); u32 real_depth; struct printbuf buf = PRINTBUF; - bool should_have_subvol; u32 i, id; int ret = 0; @@ -770,7 +784,7 @@ static int check_snapshot(struct btree_trans *trans, } } - should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && + bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && !BCH_SNAPSHOT_DELETED(&s); if (should_have_subvol) { @@ -872,6 +886,154 @@ int bch2_check_snapshots(struct bch_fs *c) return ret; } +static int check_snapshot_exists(struct btree_trans *trans, u32 id) +{ + struct bch_fs *c = trans->c; + + if (bch2_snapshot_equiv(c, id)) + return 0; + + u32 tree_id; + int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id); + if (ret) + return ret; + + struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot)); + ret = PTR_ERR_OR_ZERO(snapshot); + if (ret) + return ret; + + bkey_snapshot_init(&snapshot->k_i); + snapshot->k.p = POS(0, id); + snapshot->v.tree = cpu_to_le32(tree_id); + snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c)); + + return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: + bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, + bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?: + bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i)); +} + +/* Figure out which snapshot nodes belong in the same tree: */ +struct snapshot_tree_reconstruct { + enum btree_id btree; + struct bpos cur_pos; + snapshot_id_list cur_ids; + DARRAY(snapshot_id_list) trees; +}; + +static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r) +{ + darray_for_each(r->trees, i) + darray_exit(i); + darray_exit(&r->trees); + darray_exit(&r->cur_ids); +} + +static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos) +{ + return r->btree == BTREE_ID_inodes + ? r->cur_pos.offset == pos.offset + : r->cur_pos.inode == pos.inode; +} + +static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r) +{ + darray_for_each(*l, i) + if (snapshot_list_has_id(r, *i)) + return true; + return false; +} + +static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s) +{ + bool first = true; + darray_for_each(*s, i) { + if (!first) + prt_char(out, ' '); + first = false; + prt_printf(out, "%u", *i); + } +} + +static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r) +{ + if (r->cur_ids.nr) { + darray_for_each(r->trees, i) + if (snapshot_id_lists_have_common(i, &r->cur_ids)) { + int ret = snapshot_list_merge(c, i, &r->cur_ids); + if (ret) + return ret; + goto out; + } + darray_push(&r->trees, r->cur_ids); + darray_init(&r->cur_ids); + } +out: + r->cur_ids.nr = 0; + return 0; +} + +static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos) +{ + if (!same_snapshot(r, pos)) + snapshot_tree_reconstruct_next(c, r); + r->cur_pos = pos; + return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot); +} + +int bch2_reconstruct_snapshots(struct bch_fs *c) +{ + struct btree_trans *trans = bch2_trans_get(c); + struct printbuf buf = PRINTBUF; + struct snapshot_tree_reconstruct r = {}; + int ret = 0; + + for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { + if (btree_type_has_snapshots(btree)) { + r.btree = btree; + + ret = for_each_btree_key(trans, iter, btree, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({ + get_snapshot_trees(c, &r, k.k->p); + })); + if (ret) + goto err; + + snapshot_tree_reconstruct_next(c, &r); + } + } + + darray_for_each(r.trees, t) { + printbuf_reset(&buf); + snapshot_id_list_to_text(&buf, t); + + darray_for_each(*t, id) { + if (fsck_err_on(!bch2_snapshot_equiv(c, *id), + c, snapshot_node_missing, + "snapshot node %u from tree %s missing", *id, buf.buf)) { + if (t->nr > 1) { + bch_err(c, "cannot reconstruct snapshot trees with multiple nodes"); + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err; + } + + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_snapshot_exists(trans, *id)); + if (ret) + goto err; + } + } + } +fsck_err: +err: + bch2_trans_put(trans); + snapshot_tree_reconstruct_exit(&r); + printbuf_exit(&buf); + bch_err_fn(c, ret); + return ret; +} + /* * Mark a snapshot as deleted, for future cleanup: */ @@ -1682,6 +1844,20 @@ int bch2_snapshots_read(struct bch_fs *c) POS_MIN, 0, k, (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); bch_err_fn(c, ret); + + /* + * It's important that we check if we need to reconstruct snapshots + * before going RW, so we mark that pass as required in the superblock - + * otherwise, we could end up deleting keys with missing snapshot nodes + * instead + */ + BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && + test_bit(BCH_FS_may_go_rw, &c->flags)); + + if (bch2_err_matches(ret, EIO) || + (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))) + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots); + return ret; } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 7c66ffc06385..b7d2fed37c4f 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -33,7 +33,11 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) { - return &t->s[U32_MAX - id]; + u32 idx = U32_MAX - id; + + return likely(t && idx < t->nr) + ? &t->s[idx] + : NULL; } static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) @@ -44,7 +48,8 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) { rcu_read_lock(); - id = snapshot_t(c, id)->tree; + const struct snapshot_t *s = snapshot_t(c, id); + id = s ? s->tree : 0; rcu_read_unlock(); return id; @@ -52,7 +57,8 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) { - return snapshot_t(c, id)->parent; + const struct snapshot_t *s = snapshot_t(c, id); + return s ? s->parent : 0; } static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) @@ -66,19 +72,19 @@ static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) { -#ifdef CONFIG_BCACHEFS_DEBUG - u32 parent = snapshot_t(c, id)->parent; + const struct snapshot_t *s = snapshot_t(c, id); + if (!s) + return 0; - if (parent && - snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1) + u32 parent = s->parent; + if (IS_ENABLED(CONFIG_BCACHEFS_DEBU) && + parent && + s->depth != snapshot_t(c, parent)->depth + 1) panic("id %u depth=%u parent %u depth=%u\n", id, snapshot_t(c, id)->depth, parent, snapshot_t(c, parent)->depth); return parent; -#else - return snapshot_t(c, id)->parent; -#endif } static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) @@ -116,7 +122,8 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) { - return snapshot_t(c, id)->equiv; + const struct snapshot_t *s = snapshot_t(c, id); + return s ? s->equiv : 0; } static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) @@ -133,38 +140,22 @@ static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) return id == bch2_snapshot_equiv(c, id); } -static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) +static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) { - const struct snapshot_t *s; - bool ret; - rcu_read_lock(); - s = snapshot_t(c, id); - ret = s->children[0]; + const struct snapshot_t *s = snapshot_t(c, id); + int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node; rcu_read_unlock(); return ret; } -static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) -{ - return !bch2_snapshot_is_internal_node(c, id); -} - -static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) +static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) { - const struct snapshot_t *s; - u32 parent = __bch2_snapshot_parent(c, id); - - if (!parent) - return 0; - - s = snapshot_t(c, __bch2_snapshot_parent(c, id)); - if (id == s->children[0]) - return s->children[1]; - if (id == s->children[1]) - return s->children[0]; - return 0; + int ret = bch2_snapshot_is_internal_node(c, id); + if (ret < 0) + return ret; + return !ret; } static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) @@ -218,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id) { - int ret; - BUG_ON(snapshot_list_has_id(s, id)); - ret = darray_push(s, id); + int ret = darray_push(s, id); if (ret) bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); return ret; } +static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id) +{ + int ret = snapshot_list_has_id(s, id) + ? 0 + : darray_push(s, id); + if (ret) + bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); + return ret; +} + +static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src) +{ + darray_for_each(*src, i) { + int ret = snapshot_list_add_nodup(c, dst, *i); + if (ret) + return ret; + } + + return 0; +} + int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, struct bch_snapshot *s); int bch2_snapshot_get_subvol(struct btree_trans *, u32, @@ -238,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32, int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *); +int bch2_reconstruct_snapshots(struct bch_fs *); int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); void bch2_delete_dead_snapshots_work(struct work_struct *); @@ -249,7 +260,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans, struct bpos pos) { if (!btree_type_has_snapshots(id) || - bch2_snapshot_is_leaf(trans->c, pos.snapshot)) + bch2_snapshot_is_leaf(trans->c, pos.snapshot) > 0) return 0; return __bch2_key_has_snapshot_overwrites(trans, id, pos); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index ce7aed121942..88a79c823276 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -595,6 +595,78 @@ err: return ret; } +int bch2_initialize_subvolumes(struct bch_fs *c) +{ + struct bkey_i_snapshot_tree root_tree; + struct bkey_i_snapshot root_snapshot; + struct bkey_i_subvolume root_volume; + int ret; + + bkey_snapshot_tree_init(&root_tree.k_i); + root_tree.k.p.offset = 1; + root_tree.v.master_subvol = cpu_to_le32(1); + root_tree.v.root_snapshot = cpu_to_le32(U32_MAX); + + bkey_snapshot_init(&root_snapshot.k_i); + root_snapshot.k.p.offset = U32_MAX; + root_snapshot.v.flags = 0; + root_snapshot.v.parent = 0; + root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL); + root_snapshot.v.tree = cpu_to_le32(1); + SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true); + + bkey_subvolume_init(&root_volume.k_i); + root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; + root_volume.v.flags = 0; + root_volume.v.snapshot = cpu_to_le32(U32_MAX); + root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); + + ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: + bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: + bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); + bch_err_fn(c, ret); + return ret; +} + +static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bch_inode_unpacked inode; + int ret; + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); + ret = bkey_err(k); + if (ret) + return ret; + + if (!bkey_is_inode(k.k)) { + bch_err(trans->c, "root inode not found"); + ret = -BCH_ERR_ENOENT_inode; + goto err; + } + + ret = bch2_inode_unpack(k, &inode); + BUG_ON(ret); + + inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; + + ret = bch2_inode_write(trans, &iter, &inode); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +/* set bi_subvol on root inode */ +int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) +{ + int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, + __bch2_fs_upgrade_for_subvolumes(trans)); + bch_err_fn(c, ret); + return ret; +} + int bch2_fs_subvolumes_init(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 903c05162c06..d2015d549bd2 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -37,6 +37,9 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *); int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool); +int bch2_initialize_subvolumes(struct bch_fs *); +int bch2_fs_upgrade_for_subvolumes(struct bch_fs *); + int bch2_fs_subvolumes_init(struct bch_fs *); #endif /* _BCACHEFS_SUBVOLUME_H */ diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h index ae644adfc391..9b10c8947828 100644 --- a/fs/bcachefs/subvolume_types.h +++ b/fs/bcachefs/subvolume_types.h @@ -20,6 +20,8 @@ struct snapshot_t { }; struct snapshot_table { + struct rcu_head rcu; + size_t nr; #ifndef RUST_BINDGEN DECLARE_FLEX_ARRAY(struct snapshot_t, s); #else diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index ad28e370b640..5eee055ee272 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -8,7 +8,7 @@ #include "journal.h" #include "journal_sb.h" #include "journal_seq_blacklist.h" -#include "recovery.h" +#include "recovery_passes.h" #include "replicas.h" #include "quota.h" #include "sb-clean.h" @@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb) { kfree(sb->bio); if (!IS_ERR_OR_NULL(sb->s_bdev_file)) - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); kfree(sb->holder); kfree(sb->sb_name); @@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c) memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); - if (ext) + if (ext) { le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, sizeof(c->sb.errors_silent) * 8); + c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); + } for_each_member_device(c, ca) { struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); @@ -1162,6 +1164,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, kfree(errors_silent); } + + prt_printf(out, "Btrees with missing data:"); + prt_tab(out); + prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data)); + prt_newline(out); } static const struct bch_sb_field_ops bch_sb_field_ops_ext = { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 1ad6e5cd9476..ed63018f21be 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -15,6 +15,7 @@ #include "btree_gc.h" #include "btree_journal_iter.h" #include "btree_key_cache.h" +#include "btree_node_scan.h" #include "btree_update_interior.h" #include "btree_io.h" #include "btree_write_buffer.h" @@ -365,7 +366,7 @@ void bch2_fs_read_only(struct bch_fs *c) !test_bit(BCH_FS_emergency_ro, &c->flags) && test_bit(BCH_FS_started, &c->flags) && test_bit(BCH_FS_clean_shutdown, &c->flags) && - !c->opts.norecovery) { + c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) { BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal)); BUG_ON(atomic_read(&c->btree_cache.dirty)); BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty)); @@ -510,7 +511,8 @@ err: int bch2_fs_read_write(struct bch_fs *c) { - if (c->opts.norecovery) + if (c->opts.recovery_pass_last && + c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay) return -BCH_ERR_erofs_norecovery; if (c->opts.nochanges) @@ -535,6 +537,7 @@ static void __bch2_fs_free(struct bch_fs *c) for (i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); @@ -559,6 +562,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); bch2_journal_keys_put_initial(c); + bch2_find_btree_nodes_exit(&c->found_btree_nodes); BUG_ON(atomic_read(&c->journal_keys.ref)); bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); @@ -1015,8 +1019,16 @@ int bch2_fs_start(struct bch_fs *c) for_each_online_member(c, ca) bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); + struct bch_sb_field_ext *ext = + bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); mutex_unlock(&c->sb_lock); + if (!ext) { + bch_err(c, "insufficient space in superblock for sb_field_ext"); + ret = -BCH_ERR_ENOSPC_sb; + goto err; + } + for_each_rw_member(c, ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 216fadf16928..92c6ad75e702 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -707,149 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) } } -static int alignment_ok(const void *base, size_t align) -{ - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || - ((unsigned long)base & (align - 1)) == 0; -} - -static void u32_swap(void *a, void *b, size_t size) -{ - u32 t = *(u32 *)a; - *(u32 *)a = *(u32 *)b; - *(u32 *)b = t; -} - -static void u64_swap(void *a, void *b, size_t size) -{ - u64 t = *(u64 *)a; - *(u64 *)a = *(u64 *)b; - *(u64 *)b = t; -} - -static void generic_swap(void *a, void *b, size_t size) -{ - char t; - - do { - t = *(char *)a; - *(char *)a++ = *(char *)b; - *(char *)b++ = t; - } while (--size > 0); -} - -static inline int do_cmp(void *base, size_t n, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - size_t l, size_t r) -{ - return cmp_func(base + inorder_to_eytzinger0(l, n) * size, - base + inorder_to_eytzinger0(r, n) * size, - size); -} - -static inline void do_swap(void *base, size_t n, size_t size, - void (*swap_func)(void *, void *, size_t), - size_t l, size_t r) -{ - swap_func(base + inorder_to_eytzinger0(l, n) * size, - base + inorder_to_eytzinger0(r, n) * size, - size); -} - -void eytzinger0_sort(void *base, size_t n, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)) -{ - int i, c, r; - - if (!swap_func) { - if (size == 4 && alignment_ok(base, 4)) - swap_func = u32_swap; - else if (size == 8 && alignment_ok(base, 8)) - swap_func = u64_swap; - else - swap_func = generic_swap; - } - - /* heapify */ - for (i = n / 2 - 1; i >= 0; --i) { - for (r = i; r * 2 + 1 < n; r = c) { - c = r * 2 + 1; - - if (c + 1 < n && - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) - c++; - - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) - break; - - do_swap(base, n, size, swap_func, r, c); - } - } - - /* sort */ - for (i = n - 1; i > 0; --i) { - do_swap(base, n, size, swap_func, 0, i); - - for (r = 0; r * 2 + 1 < i; r = c) { - c = r * 2 + 1; - - if (c + 1 < i && - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) - c++; - - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) - break; - - do_swap(base, n, size, swap_func, r, c); - } - } -} - -void sort_cmp_size(void *base, size_t num, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t size)) -{ - /* pre-scale counters for performance */ - int i = (num/2 - 1) * size, n = num * size, c, r; - - if (!swap_func) { - if (size == 4 && alignment_ok(base, 4)) - swap_func = u32_swap; - else if (size == 8 && alignment_ok(base, 8)) - swap_func = u64_swap; - else - swap_func = generic_swap; - } - - /* heapify */ - for ( ; i >= 0; i -= size) { - for (r = i; r * 2 + size < n; r = c) { - c = r * 2 + size; - if (c < n - size && - cmp_func(base + c, base + c + size, size) < 0) - c += size; - if (cmp_func(base + r, base + c, size) >= 0) - break; - swap_func(base + r, base + c, size); - } - } - - /* sort */ - for (i = n - size; i > 0; i -= size) { - swap_func(base, base + i, size); - for (r = 0; r * 2 + size < i; r = c) { - c = r * 2 + size; - if (c < i - size && - cmp_func(base + c, base + c + size, size) < 0) - c += size; - if (cmp_func(base + r, base + c, size) >= 0) - break; - swap_func(base + r, base + c, size); - } - } -} - #if 0 void eytzinger1_test(void) { diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 175aee3074c7..b7e7c29278fc 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -631,10 +631,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes) memset(s + bytes, c, rem); } -void sort_cmp_size(void *base, size_t num, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)); - /* just the memmove, doesn't update @_nr */ #define __array_insert_item(_array, _nr, _pos) \ memmove(&(_array)[(_pos) + 1], \ @@ -797,4 +793,14 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr) addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64)); } +static inline void __clear_bit_le64(size_t bit, __le64 *addr) +{ + addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64)); +} + +static inline bool test_bit_le64(size_t bit, __le64 *addr) +{ + return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; +} + #endif /* _BCACHEFS_UTIL_H */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 1920ed69279b..3314249e8674 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1359,7 +1359,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); rcu_read_unlock(); - strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); + get_task_comm(psinfo->pr_fname, p); return 0; } diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5f7587ca1ca7..1e09aeea69c2 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1559,7 +1559,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if (space_info->total_bytes - block_group->length < used) { + if (space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7441245b1ceb..61594eaf1f89 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4333,6 +4333,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags)) goto done; + /* + * Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above + * test_and_set_bit(EXTENT_BUFFER_READING), someone else could have + * started and finished reading the same eb. In this case, UPTODATE + * will now be set, and we shouldn't read it in again. + */ + if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) { + clear_bit(EXTENT_BUFFER_READING, &eb->bflags); + smp_mb__after_atomic(); + wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING); + return 0; + } + clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; check_buffer_tree_ref(eb); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 347ca13d15a9..445f7716f1e2 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -309,7 +309,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) btrfs_warn(fs_info, "no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu", btrfs_ino(inode), btrfs_root_id(inode->root), - start, len, gen); + start, start + len, gen); ret = -ENOENT; goto out; } @@ -318,7 +318,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) btrfs_warn(fs_info, "found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu", btrfs_ino(inode), btrfs_root_id(inode->root), - em->start, start, len, gen); + em->start, start, start + len, gen); ret = -EUCLEAN; goto out; } @@ -340,9 +340,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) em->mod_len = em->len; } - free_extent_map(em); out: write_unlock(&tree->lock); + free_extent_map(em); return ret; } @@ -629,13 +629,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info, */ ret = merge_extent_mapping(em_tree, existing, em, start); - if (ret) { + if (WARN_ON(ret)) { free_extent_map(em); *em_in = NULL; - WARN_ONCE(ret, -"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n", - existing->start, existing->len, - orig_start, orig_len, start); + btrfs_warn(fs_info, +"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu", + existing->start, extent_map_end(existing), + orig_start, orig_start + orig_len, start); } free_extent_map(existing); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c4bd0e60db59..fa25004ab04e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2812,7 +2812,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, gen = btrfs_get_last_trans_committed(fs_info); for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); + ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr); + if (ret == -ENOENT) + break; + + if (ret) { + spin_lock(&sctx->stat_lock); + sctx->stat.super_errors++; + spin_unlock(&sctx->stat_lock); + continue; + } + if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->commit_total_bytes) break; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1dc1f1946ae0..f15591f3e54f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -692,6 +692,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, device->bdev = file_bdev(bdev_file); clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); + if (device->devt != device->bdev->bd_dev) { + btrfs_warn(NULL, + "device %s maj:min changed from %d:%d to %d:%d", + device->name->str, MAJOR(device->devt), + MINOR(device->devt), MAJOR(device->bdev->bd_dev), + MINOR(device->bdev->bd_dev)); + + device->devt = device->bdev->bd_dev; + } + fs_devices->open_devices++; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && device->devid != BTRFS_DEV_REPLACE_DEVID) { @@ -1174,23 +1184,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; + int ret = 0; list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { - int ret; + int ret2; - ret = btrfs_open_one_device(fs_devices, device, flags, holder); - if (ret == 0 && + ret2 = btrfs_open_one_device(fs_devices, device, flags, holder); + if (ret2 == 0 && (!latest_dev || device->generation > latest_dev->generation)) { latest_dev = device; - } else if (ret == -ENODATA) { + } else if (ret2 == -ENODATA) { fs_devices->num_devices--; list_del(&device->dev_list); btrfs_free_device(device); } + if (ret == 0 && ret2 != 0) + ret = ret2; } - if (fs_devices->open_devices == 0) + + if (fs_devices->open_devices == 0) { + if (ret) + return ret; return -EINVAL; + } fs_devices->opened = 1; fs_devices->latest_dev = latest_dev; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 5a3d5ec75c5a..4cba80b34387 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1574,11 +1574,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) if (!map) return -EINVAL; - cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS); - if (!cache->physical_map) { - ret = -ENOMEM; - goto out; - } + cache->physical_map = map; zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); if (!zone_info) { @@ -1690,7 +1686,6 @@ out: } bitmap_free(active); kfree(zone_info); - btrfs_free_chunk_map(map); return ret; } @@ -2175,6 +2170,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ struct btrfs_chunk_map *map; const bool is_metadata = (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)); + struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret = 0; int i; @@ -2250,6 +2246,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ btrfs_clear_data_reloc_bg(block_group); spin_unlock(&block_group->lock); + down_read(&dev_replace->rwsem); map = block_group->physical_map; for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; @@ -2266,13 +2263,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ zinfo->zone_size >> SECTOR_SHIFT); memalloc_nofs_restore(nofs_flags); - if (ret) + if (ret) { + up_read(&dev_replace->rwsem); return ret; + } if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) zinfo->reserved_active_zones++; btrfs_dev_clear_active_zone(device, physical); } + up_read(&dev_replace->rwsem); if (!fully_written) btrfs_dec_block_group_ro(block_group); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 39e75131fd5a..9901057a15ba 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb) sb->s_mtd = NULL; } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } kfree(sbi); } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 69308fd73e4a..c0eb139adb07 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -430,7 +430,6 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) switch (mode) { case EROFS_MOUNT_DAX_ALWAYS: - warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(&ctx->opt, DAX_ALWAYS); clear_opt(&ctx->opt, DAX_NEVER); return true; diff --git a/fs/exec.c b/fs/exec.c index ff6f26671cfc..cf1df7f16e55 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -895,6 +895,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm, goto out; } + bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cfb8449c731f..044135796f2b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5668,7 +5668,7 @@ failed_mount: brelse(sbi->s_sbh); if (sbi->s_journal_bdev_file) { invalidate_bdev(file_bdev(sbi->s_journal_bdev_file)); - fput(sbi->s_journal_bdev_file); + bdev_fput(sbi->s_journal_bdev_file); } out_fail: invalidate_bdev(sb->s_bdev); @@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb, out_bh: brelse(bh); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb, out_journal: jbd2_journal_destroy(journal); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb) kill_block_super(sb); if (bdev_file) - fput(bdev_file); + bdev_fput(bdev_file); } static struct file_system_type ext4_fs_type = { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6867f26f141..a4bc26dfdb1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { if (i > 0) - fput(FDEV(i).bdev_file); + bdev_fput(FDEV(i).bdev_file); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 789af5c8fade..aa1626955b2c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1718,7 +1718,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; - u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; + unsigned int bsize = 1 << bsize_shift; + u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; @@ -1729,7 +1730,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) u64 prev_bnr = 0; __be64 *start, *end; - if (offset >= maxsize) { + if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated metadata; * there are no blocks to deallocate. diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 73389c68e251..9609349e92e5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1141,7 +1141,7 @@ journal_found: lbmLogShutdown(log); close: /* close external log device */ - fput(bdev_file); + bdev_fput(bdev_file); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb) bdev_file = log->bdev_file; rc = lmLogShutdown(log); - fput(bdev_file); + bdev_fput(bdev_file); kfree(log); diff --git a/fs/namei.c b/fs/namei.c index ceb9ddf8dfdd..c5b2a25be7d0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4050,6 +4050,8 @@ retry: case 0: case S_IFREG: error = vfs_create(idmap, path.dentry->d_inode, dentry, mode, true); + if (!error) + security_path_post_mknod(idmap, dentry); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, @@ -4060,11 +4062,6 @@ retry: dentry, mode, 0); break; } - - if (error) - goto out2; - - security_path_post_mknod(idmap, dentry); out2: done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1a93c7fcf76c..84d4093ca713 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3042,12 +3042,9 @@ static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - spin_lock(&nn->client_lock); clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); + drop_client(clp); } static int @@ -3831,15 +3828,20 @@ nfsd4_create_session(struct svc_rqst *rqstp, else cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); - if (status) { - if (status == nfserr_replay_cache) { - status = nfsd4_replay_create_session(cr_ses, cs_slot); - goto out_free_conn; - } + switch (status) { + case nfs_ok: + cs_slot->sl_seqid++; + cr_ses->seqid = cs_slot->sl_seqid; + break; + case nfserr_replay_cache: + status = nfsd4_replay_create_session(cr_ses, cs_slot); + fallthrough; + case nfserr_jukebox: + /* The server MUST NOT cache NFS4ERR_DELAY */ + goto out_free_conn; + default: goto out_cache_error; } - cs_slot->sl_seqid++; - cr_ses->seqid = cs_slot->sl_seqid; /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */ if (conf) { @@ -3859,10 +3861,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); - if (status) { - old = NULL; - goto out_cache_error; - } + if (status) + goto out_expired_error; trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); @@ -3894,6 +3894,17 @@ nfsd4_create_session(struct svc_rqst *rqstp, expire_client(old); return status; +out_expired_error: + old = NULL; + /* + * Revert the slot seq_nr change so the server will process + * the client's resend instead of returning a cached response. + */ + if (status == nfserr_jukebox) { + cs_slot->sl_seqid--; + cr_ses->seqid = cs_slot->sl_seqid; + goto out_free_conn; + } out_cache_error: nfsd4_cache_create_session(cr_ses, cs_slot, status); out_free_conn: @@ -6602,7 +6613,7 @@ deleg_reaper(struct nfsd_net *nn) list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ - atomic_inc(&clp->cl_rpc_users); + kref_get(&clp->cl_nfsdfs.cl_ref); set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6a9464262fae..2e41eb4c3cec 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1852,7 +1852,7 @@ retry: trap = lock_rename(tdentry, fdentry); if (IS_ERR(trap)) { err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - goto out; + goto out_want_write; } err = fh_fill_pre_attrs(ffhp); if (err != nfs_ok) @@ -1922,6 +1922,7 @@ retry: } out_unlock: unlock_rename(tdentry, fdentry); +out_want_write: fh_drop_write(ffhp); /* diff --git a/fs/proc/Makefile b/fs/proc/Makefile index bd08616ed8ba..7b4db9c56e6a 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -5,7 +5,7 @@ obj-y += proc.o -CFLAGS_task_mmu.o += $(call cc-option,-Wno-override-init,) +CFLAGS_task_mmu.o += -Wno-override-init proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := task_mmu.o diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 6474529c4253..e539ccd39e1e 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb) static void release_journal_dev(struct reiserfs_journal *journal) { if (journal->j_bdev_file) { - fput(journal->j_bdev_file); + bdev_fput(journal->j_bdev_file); journal->j_bdev_file = NULL; } } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 2be227532f39..2cbb92462074 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb) #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } #endif } diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index a0017724d523..13a9d7acf8f8 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -417,6 +417,7 @@ smb2_close_cached_fid(struct kref *ref) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); + int rc; spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->on_list) { @@ -430,9 +431,10 @@ smb2_close_cached_fid(struct kref *ref) cfid->dentry = NULL; if (cfid->is_open) { - SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, + rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); - atomic_dec(&cfid->tcon->num_remote_opens); + if (rc != -EBUSY && rc != -EAGAIN) + atomic_dec(&cfid->tcon->num_remote_opens); } free_cached_dir(cfid); diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 226d4835c92d..c71ae5c04306 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); list_for_each_entry(cfile, &tcon->openFileList, tlist) { @@ -676,6 +678,8 @@ static ssize_t cifs_stats_proc_write(struct file *file, } #endif /* CONFIG_CIFS_STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { atomic_set(&tcon->num_smbs_sent, 0); spin_lock(&tcon->stat_lock); @@ -755,6 +759,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) } #endif /* STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { i++; seq_printf(m, "\n%d) %s", i, tcon->tree_name); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index aa6f1ecb7c0e..d41eedbff674 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -156,6 +156,7 @@ struct workqueue_struct *decrypt_wq; struct workqueue_struct *fileinfo_put_wq; struct workqueue_struct *cifsoplockd_wq; struct workqueue_struct *deferredclose_wq; +struct workqueue_struct *serverclose_wq; __u32 cifs_lock_secret; /* @@ -1888,6 +1889,13 @@ init_cifs(void) goto out_destroy_cifsoplockd_wq; } + serverclose_wq = alloc_workqueue("serverclose", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!serverclose_wq) { + rc = -ENOMEM; + goto out_destroy_serverclose_wq; + } + rc = cifs_init_inodecache(); if (rc) goto out_destroy_deferredclose_wq; @@ -1962,6 +1970,8 @@ out_destroy_decrypt_wq: destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); +out_destroy_serverclose_wq: + destroy_workqueue(serverclose_wq); out_clean_proc: cifs_proc_clean(); return rc; @@ -1991,6 +2001,7 @@ exit_cifs(void) destroy_workqueue(cifsoplockd_wq); destroy_workqueue(decrypt_wq); destroy_workqueue(fileinfo_put_wq); + destroy_workqueue(serverclose_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7ed9d05f6890..f6a302205f89 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -442,10 +442,10 @@ struct smb_version_operations { /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ - void (*close)(const unsigned int, struct cifs_tcon *, + int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); @@ -1281,7 +1281,6 @@ struct cifs_tcon { struct cached_fids *cfids; /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL - struct list_head dfs_ses_list; struct delayed_work dfs_cache_work; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ @@ -1440,6 +1439,7 @@ struct cifsFileInfo { bool swapfile:1; bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ + bool offload:1; /* offload final part of _put to a wq */ unsigned int oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; @@ -1448,6 +1448,7 @@ struct cifsFileInfo { struct cifs_search_info srch_inf; struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ + struct work_struct serverclose; /* work for serverclose */ struct delayed_work deferred; bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ char *symlink_target; @@ -1804,7 +1805,6 @@ struct cifs_mount_ctx { struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct list_head dfs_ses_list; }; static inline void __free_dfs_info_param(struct dfs_info3_param *param) @@ -2105,6 +2105,7 @@ extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *fileinfo_put_wq; extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; +extern struct workqueue_struct *serverclose_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_sm_req_poolp; @@ -2324,4 +2325,14 @@ struct smb2_compound_vars { struct kvec ea_iov; }; +static inline bool cifs_ses_exiting(struct cifs_ses *ses) +{ + bool ret; + + spin_lock(&ses->ses_lock); + ret = ses->ses_status == SES_EXITING; + spin_unlock(&ses->ses_lock); + return ret; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 0723e1b57256..8e0a348f1f66 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -725,31 +725,31 @@ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); void cifs_put_tcon_super(struct super_block *sb); int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry); -/* Put references of @ses and @ses->dfs_root_ses */ +/* Put references of @ses and its children */ static inline void cifs_put_smb_ses(struct cifs_ses *ses) { - struct cifs_ses *rses = ses->dfs_root_ses; + struct cifs_ses *next; - __cifs_put_smb_ses(ses); - if (rses) - __cifs_put_smb_ses(rses); + do { + next = ses->dfs_root_ses; + __cifs_put_smb_ses(ses); + } while ((ses = next)); } -/* Get an active reference of @ses and @ses->dfs_root_ses. +/* Get an active reference of @ses and its children. * * NOTE: make sure to call this function when incrementing reference count of * @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses) * will also get its reference count incremented. * - * cifs_put_smb_ses() will put both references, so call it when you're done. + * cifs_put_smb_ses() will put all references, so call it when you're done. */ static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses) { lockdep_assert_held(&cifs_tcp_ses_lock); - ses->ses_count++; - if (ses->dfs_root_ses) - ses->dfs_root_ses->ses_count++; + for (; ses; ses = ses->dfs_root_ses) + ses->ses_count++; } static inline bool dfs_src_pathname_equal(const char *s1, const char *s2) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 5aee55551573..23b5709ddc31 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -5854,10 +5854,8 @@ SetEARetry: parm_data->list.EA_flags = 0; /* we checked above that name len is less than 255 */ parm_data->list.name_len = (__u8)name_len; - /* EA names are always ASCII */ - if (ea_name) - strncpy(parm_data->list.name, ea_name, name_len); - parm_data->list.name[name_len] = '\0'; + /* EA names are always ASCII and NUL-terminated */ + strscpy(parm_data->list.name, ea_name ?: "", name_len + 1); parm_data->list.value_len = cpu_to_le16(ea_value_len); /* caller ensures that ea_value_len is less than 64K but we need to ensure that it fits within the smb */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 9b85b5341822..85679ae106fd 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -175,6 +175,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { if (!ses->chans[i].server) @@ -232,7 +234,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { - /* check if iface is still active */ + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); + continue; + } + spin_unlock(&ses->ses_lock); + spin_lock(&ses->chan_lock); if (cifs_ses_get_chan_index(ses, server) == CIFS_INVAL_CHAN_INDEX) { @@ -1860,6 +1868,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) ctx->sectype != ses->sectype) return 0; + if (ctx->dfs_root_ses != ses->dfs_root_ses) + return 0; + /* * If an existing session is limited to less channels than * requested, it should not be reused @@ -1963,31 +1974,6 @@ out: return rc; } -/** - * cifs_free_ipc - helper to release the session IPC tcon - * @ses: smb session to unmount the IPC from - * - * Needs to be called everytime a session is destroyed. - * - * On session close, the IPC is closed and the server must release all tcons of the session. - * No need to send a tree disconnect here. - * - * Besides, it will make the server to not close durable and resilient files on session close, as - * specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request. - */ -static int -cifs_free_ipc(struct cifs_ses *ses) -{ - struct cifs_tcon *tcon = ses->tcon_ipc; - - if (tcon == NULL) - return 0; - - tconInfoFree(tcon); - ses->tcon_ipc = NULL; - return 0; -} - static struct cifs_ses * cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { @@ -2019,48 +2005,52 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) void __cifs_put_smb_ses(struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; + struct cifs_tcon *tcon; unsigned int xid; size_t i; + bool do_logoff; int rc; + spin_lock(&cifs_tcp_ses_lock); spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_EXITING) { + cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n", + __func__, ses->Suid, ses->ses_count, ses->ses_status, + ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none"); + if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) { spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); return; } - spin_unlock(&ses->ses_lock); + /* ses_count can never go negative */ + WARN_ON(ses->ses_count < 0); - cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); - cifs_dbg(FYI, - "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE"); + spin_lock(&ses->chan_lock); + cifs_chan_clear_need_reconnect(ses, server); + spin_unlock(&ses->chan_lock); - spin_lock(&cifs_tcp_ses_lock); - if (--ses->ses_count > 0) { - spin_unlock(&cifs_tcp_ses_lock); - return; - } - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_GOOD) - ses->ses_status = SES_EXITING; + do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff; + ses->ses_status = SES_EXITING; + tcon = ses->tcon_ipc; + ses->tcon_ipc = NULL; spin_unlock(&ses->ses_lock); spin_unlock(&cifs_tcp_ses_lock); - /* ses_count can never go negative */ - WARN_ON(ses->ses_count < 0); - - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_EXITING && server->ops->logoff) { - spin_unlock(&ses->ses_lock); - cifs_free_ipc(ses); + /* + * On session close, the IPC is closed and the server must release all + * tcons of the session. No need to send a tree disconnect here. + * + * Besides, it will make the server to not close durable and resilient + * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an + * SMB2 LOGOFF Request. + */ + tconInfoFree(tcon); + if (do_logoff) { xid = get_xid(); rc = server->ops->logoff(xid, ses); if (rc) cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n", __func__, rc); _free_xid(xid); - } else { - spin_unlock(&ses->ses_lock); - cifs_free_ipc(ses); } spin_lock(&cifs_tcp_ses_lock); @@ -2373,9 +2363,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) * need to lock before changing something in the session. */ spin_lock(&cifs_tcp_ses_lock); + if (ctx->dfs_root_ses) + cifs_smb_ses_inc_refcount(ctx->dfs_root_ses); ses->dfs_root_ses = ctx->dfs_root_ses; - if (ses->dfs_root_ses) - ses->dfs_root_ses->ses_count++; list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -3326,6 +3316,9 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) cifs_put_smb_ses(mnt_ctx->ses); else if (mnt_ctx->server) cifs_put_tcp_session(mnt_ctx->server, 0); + mnt_ctx->ses = NULL; + mnt_ctx->tcon = NULL; + mnt_ctx->server = NULL; mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; free_xid(mnt_ctx->xid); } @@ -3604,8 +3597,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) bool isdfs; int rc; - INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list); - rc = dfs_mount_share(&mnt_ctx, &isdfs); if (rc) goto error; @@ -3636,7 +3627,6 @@ out: return rc; error: - dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list); cifs_mount_put_conns(&mnt_ctx); return rc; } @@ -3651,6 +3641,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; rc = cifs_mount_get_tcon(&mnt_ctx); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx.server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx.ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx.tcon)) + rc = -ENOENT; + } if (rc) goto error; @@ -3988,13 +3990,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses) } static struct cifs_tcon * -cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) +__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) { int rc; struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); struct cifs_ses *ses; struct cifs_tcon *tcon = NULL; struct smb3_fs_context *ctx; + char *origin_fullpath = NULL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx == NULL) @@ -4018,6 +4021,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ctx->sign = master_tcon->ses->sign; ctx->seal = master_tcon->seal; ctx->witness = master_tcon->use_witness; + ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses; rc = cifs_set_vol_auth(ctx, master_tcon->ses); if (rc) { @@ -4037,12 +4041,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + spin_lock(&master_tcon->tc_lock); + if (master_tcon->origin_fullpath) { + spin_unlock(&master_tcon->tc_lock); + origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source); + if (IS_ERR(origin_fullpath)) { + tcon = ERR_CAST(origin_fullpath); + origin_fullpath = NULL; + cifs_put_smb_ses(ses); + goto out; + } + } else { + spin_unlock(&master_tcon->tc_lock); + } +#endif + tcon = cifs_get_tcon(ses, ctx); if (IS_ERR(tcon)) { cifs_put_smb_ses(ses); goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + if (origin_fullpath) { + spin_lock(&tcon->tc_lock); + tcon->origin_fullpath = origin_fullpath; + spin_unlock(&tcon->tc_lock); + origin_fullpath = NULL; + queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, + dfs_cache_get_ttl() * HZ); + } +#endif + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, ctx); @@ -4051,11 +4082,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) out: kfree(ctx->username); kfree_sensitive(ctx->password); + kfree(origin_fullpath); kfree(ctx); return tcon; } +static struct cifs_tcon * +cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) +{ + struct cifs_tcon *ret; + + cifs_mount_lock(); + ret = __cifs_construct_tcon(cifs_sb, fsuid); + cifs_mount_unlock(); + return ret; +} + struct cifs_tcon * cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) { diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 449c59830039..3ec965547e3d 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -66,33 +66,20 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) } /* - * Track individual DFS referral servers used by new DFS mount. - * - * On success, their lifetime will be shared by final tcon (dfs_ses_list). - * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount(). + * Get an active reference of @ses so that next call to cifs_put_tcon() won't + * release it as any new DFS referrals must go through its IPC tcon. */ -static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) +static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct dfs_root_ses *root_ses; struct cifs_ses *ses = mnt_ctx->ses; if (ses) { - root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL); - if (!root_ses) - return -ENOMEM; - - INIT_LIST_HEAD(&root_ses->list); - spin_lock(&cifs_tcp_ses_lock); cifs_smb_ses_inc_refcount(ses); spin_unlock(&cifs_tcp_ses_lock); - root_ses->ses = ses; - list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list); } - /* Select new DFS referral server so that new referrals go through it */ ctx->dfs_root_ses = ses; - return 0; } static inline int parse_dfs_target(struct smb3_fs_context *ctx, @@ -185,11 +172,8 @@ again: continue; } - if (is_refsrv) { - rc = add_root_smb_session(mnt_ctx); - if (rc) - goto out; - } + if (is_refsrv) + add_root_smb_session(mnt_ctx); rc = ref_walk_advance(rw); if (!rc) { @@ -232,6 +216,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_tcon *tcon; char *origin_fullpath; + bool new_tcon = true; int rc; origin_fullpath = dfs_get_path(cifs_sb, ctx->source); @@ -239,6 +224,18 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) return PTR_ERR(origin_fullpath); rc = dfs_referral_walk(mnt_ctx); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx->server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx->ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx->tcon)) + rc = -ENOENT; + } if (rc) goto out; @@ -247,15 +244,14 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) if (!tcon->origin_fullpath) { tcon->origin_fullpath = origin_fullpath; origin_fullpath = NULL; + } else { + new_tcon = false; } spin_unlock(&tcon->tc_lock); - if (list_empty(&tcon->dfs_ses_list)) { - list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list); + if (new_tcon) { queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, dfs_cache_get_ttl() * HZ); - } else { - dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list); } out: @@ -298,7 +294,6 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) if (rc) return rc; - ctx->dfs_root_ses = mnt_ctx->ses; /* * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally * try to get an DFS referral (even cached) to determine whether it is an DFS mount. @@ -324,7 +319,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) *isdfs = true; add_root_smb_session(mnt_ctx); - return __dfs_mount_share(mnt_ctx); + rc = __dfs_mount_share(mnt_ctx); + dfs_put_root_smb_sessions(mnt_ctx); + return rc; } /* Update dfs referral path of superblock */ diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h index 875ab7ae57fc..e5c4dcf83750 100644 --- a/fs/smb/client/dfs.h +++ b/fs/smb/client/dfs.h @@ -7,7 +7,9 @@ #define _CIFS_DFS_H #include "cifsglob.h" +#include "cifsproto.h" #include "fs_context.h" +#include "dfs_cache.h" #include "cifs_unicode.h" #include <linux/namei.h> @@ -114,11 +116,6 @@ static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw) ref_walk_tit(rw)); } -struct dfs_root_ses { - struct list_head list; - struct cifs_ses *ses; -}; - int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, struct smb3_fs_context *ctx); int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs); @@ -133,20 +130,32 @@ static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *p { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses; - return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls, + return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls, cifs_remap(cifs_sb), path, ref, tl); } -static inline void dfs_put_root_smb_sessions(struct list_head *head) +/* + * cifs_get_smb_ses() already guarantees an active reference of + * @ses->dfs_root_ses when a new session is created, so we need to put extra + * references of all DFS root sessions that were used across the mount process + * in dfs_mount_share(). + */ +static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx) { - struct dfs_root_ses *root, *tmp; + const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct cifs_ses *ses = ctx->dfs_root_ses; + struct cifs_ses *cur; + + if (!ses) + return; - list_for_each_entry_safe(root, tmp, head, list) { - list_del_init(&root->list); - cifs_put_smb_ses(root->ses); - kfree(root); + for (cur = ses; cur; cur = cur->dfs_root_ses) { + if (cur->dfs_root_ses) + cifs_put_smb_ses(cur->dfs_root_ses); } + cifs_put_smb_ses(ses); } #endif /* _CIFS_DFS_H */ diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 508d831fabe3..11c8efecf7aa 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1172,8 +1172,8 @@ static bool is_ses_good(struct cifs_ses *ses) return ret; } -/* Refresh dfs referral of tcon and mark it for reconnect if needed */ -static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh) +/* Refresh dfs referral of @ses and mark it for reconnect if needed */ +static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh) { struct TCP_Server_Info *server = ses->server; DFS_CACHE_TGT_LIST(old_tl); @@ -1181,10 +1181,21 @@ static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_ref bool needs_refresh = false; struct cache_entry *ce; unsigned int xid; + char *path = NULL; int rc = 0; xid = get_xid(); + mutex_lock(&server->refpath_lock); + if (server->leaf_fullpath) { + path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC); + if (!path) + rc = -ENOMEM; + } + mutex_unlock(&server->refpath_lock); + if (!path) + goto out; + down_read(&htable_rw_lock); ce = lookup_cache_entry(path); needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); @@ -1218,19 +1229,17 @@ out: free_xid(xid); dfs_cache_free_tgts(&old_tl); dfs_cache_free_tgts(&new_tl); - return rc; + kfree(path); } -static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh) +static inline void refresh_ses_referral(struct cifs_ses *ses) { - struct TCP_Server_Info *server = tcon->ses->server; - struct cifs_ses *ses = tcon->ses; + __refresh_ses_referral(ses, false); +} - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh); - mutex_unlock(&server->refpath_lock); - return 0; +static inline void force_refresh_ses_referral(struct cifs_ses *ses) +{ + __refresh_ses_referral(ses, true); } /** @@ -1271,34 +1280,20 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) */ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; - return refresh_tcon(tcon, true); + force_refresh_ses_referral(tcon->ses); + return 0; } /* Refresh all DFS referrals related to DFS tcon */ void dfs_cache_refresh(struct work_struct *work) { - struct TCP_Server_Info *server; - struct dfs_root_ses *rses; struct cifs_tcon *tcon; struct cifs_ses *ses; tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work); - ses = tcon->ses; - server = ses->server; - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, false); - mutex_unlock(&server->refpath_lock); - - list_for_each_entry(rses, &tcon->dfs_ses_list, list) { - ses = rses->ses; - server = ses->server; - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, false); - mutex_unlock(&server->refpath_lock); - } + for (ses = tcon->ses; ses; ses = ses->dfs_root_ses) + refresh_ses_referral(ses); queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, atomic_read(&dfs_cache_ttl) * HZ); diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index 89333d9bce36..864b194dbaa0 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; *oplock = 0; if (tcon->ses->server->oplocks) @@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned return PTR_ERR(full_path); } + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & @@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned desired_access |= GENERIC_READ; /* is this too little? */ if (OPEN_FMODE(oflags) & FMODE_WRITE) desired_access |= GENERIC_WRITE; + if (rdwr_for_fscache == 1) + desired_access |= GENERIC_READ; disposition = FILE_OVERWRITE_IF; if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) { cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access &= ~GENERIC_READ; + rdwr_for_fscache = 2; + goto retry_open; + } goto out; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* @@ -612,11 +627,18 @@ int cifs_mknod(struct mnt_idmap *idmap, struct inode *inode, goto mknod_out; } + trace_smb3_mknod_enter(xid, tcon->ses->Suid, tcon->tid, full_path); + rc = tcon->ses->server->ops->make_node(xid, inode, direntry, tcon, full_path, mode, device_number); mknod_out: + if (rc) + trace_smb3_mknod_err(xid, tcon->ses->Suid, tcon->tid, rc); + else + trace_smb3_mknod_done(xid, tcon->ses->Suid, tcon->tid); + free_dentry_path(page); free_xid(xid); cifs_put_tlink(tlink); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 16aadce492b2..9be37d0fe724 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) */ } -static inline int cifs_convert_flags(unsigned int flags) +static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) { if ((flags & O_ACCMODE) == O_RDONLY) return GENERIC_READ; else if ((flags & O_ACCMODE) == O_WRONLY) - return GENERIC_WRITE; + return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; else if ((flags & O_ACCMODE) == O_RDWR) { /* GENERIC_ALL is too much permission to request can cause unnecessary access denied on create */ @@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ int create_options = CREATE_NOT_DIR; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; if (!server->ops->open) return -ENOSYS; - desired_access = cifs_convert_flags(f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); /********************************************************************* * open flag mapping table: @@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ if (f_flags & O_DIRECT) create_options |= CREATE_NO_BUFFER; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ }; rc = server->ops->open(xid, &oparms, oplock, buf); - if (rc) + if (rc) { + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } return rc; + } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); /* TODO: Add support for calling posix query info but with passing in fid */ if (tcon->unix_ext) @@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem) } static void cifsFileInfo_put_work(struct work_struct *work); +void serverclose_work(struct work_struct *work); struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock, @@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_WORK(&cfile->put, cifsFileInfo_put_work); + INIT_WORK(&cfile->serverclose, serverclose_work); INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); @@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) cifsFileInfo_put_final(cifs_file); } +void serverclose_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, serverclose); + + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + + struct TCP_Server_Info *server = tcon->ses->server; + int rc = 0; + int retries = 0; + int MAX_RETRIES = 4; + + do { + if (server->ops->close_getattr) + rc = server->ops->close_getattr(0, tcon, cifs_file); + else if (server->ops->close) + rc = server->ops->close(0, tcon, &cifs_file->fid); + + if (rc == -EBUSY || rc == -EAGAIN) { + retries++; + msleep(250); + } + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) + ); + + if (retries == MAX_RETRIES) + pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); + + if (cifs_file->offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); +} + /** * cifsFileInfo_put - release a reference of file priv data * @@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, struct cifs_fid fid = {}; struct cifs_pending_open open; bool oplock_break_cancelled; + bool serverclose_offloaded = false; spin_lock(&tcon->open_file_lock); spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); + + cifs_file->offload = offload; if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); spin_unlock(&cifsi->open_file_lock); @@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); if (server->ops->close_getattr) - server->ops->close_getattr(xid, tcon, cifs_file); + rc = server->ops->close_getattr(xid, tcon, cifs_file); else if (server->ops->close) - server->ops->close(xid, tcon, &cifs_file->fid); + rc = server->ops->close(xid, tcon, &cifs_file->fid); _free_xid(xid); + + if (rc == -EBUSY || rc == -EAGAIN) { + // Server close failed, hence offloading it as an async op + queue_work(serverclose_wq, &cifs_file->serverclose); + serverclose_offloaded = true; + } } if (oplock_break_cancelled) @@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, cifs_del_pending_open(&open); - if (offload) - queue_work(fileinfo_put_wq, &cifs_file->put); - else - cifsFileInfo_put_final(cifs_file); + // if serverclose has been offloaded to wq (on failure), it will + // handle offloading put as well. If serverclose not offloaded, + // we need to handle offloading put here. + if (!serverclose_offloaded) { + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); + } } int cifs_open(struct inode *inode, struct file *file) @@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file) use_cache: fscache_use_cookie(cifs_inode_cookie(file_inode(file)), file->f_mode & FMODE_WRITE); - if (file->f_flags & O_DIRECT && - (!((file->f_flags & O_ACCMODE) != O_RDONLY) || - file->f_flags & O_APPEND)) - cifs_invalidate_cache(file_inode(file), - FSCACHE_INVAL_DIO_WRITE); + if (!(file->f_flags & O_DIRECT)) + goto out; + if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) + goto out; + cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); out: free_dentry_path(page); @@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - desired_access = cifs_convert_flags(cfile->f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) @@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) /* indicate that we need to relock the file */ oparms.reconnect = true; } + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(cfile->f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } if (rc) { mutex_unlock(&cfile->fh_mutex); @@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) goto reopen_error_exit; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY reopen_success: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index bdcbe6ff2739..b7bfe705b2c4 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -37,7 +37,7 @@ #include "rfc1002pdu.h" #include "fs_context.h" -static DEFINE_MUTEX(cifs_mount_mutex); +DEFINE_MUTEX(cifs_mount_mutex); static const match_table_t cifs_smb_version_tokens = { { Smb_1, SMB1_VERSION_STRING }, @@ -783,9 +783,9 @@ static int smb3_get_tree(struct fs_context *fc) if (err) return err; - mutex_lock(&cifs_mount_mutex); + cifs_mount_lock(); ret = smb3_get_tree_common(fc); - mutex_unlock(&cifs_mount_mutex); + cifs_mount_unlock(); return ret; } diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 7863f2248c4d..8a35645e0b65 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -304,4 +304,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); #define MAX_CACHED_FIDS 16 extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp); +extern struct mutex cifs_mount_mutex; + +static inline void cifs_mount_lock(void) +{ + mutex_lock(&cifs_mount_mutex); +} + +static inline void cifs_mount_unlock(void) +{ + mutex_unlock(&cifs_mount_mutex); +} + #endif diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index c4a3cb736881..340efce8f052 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -12,6 +12,16 @@ #include "cifs_fs_sb.h" #include "cifsproto.h" +/* + * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode(). + */ +struct cifs_fscache_inode_key { + + __le64 uniqueid; /* server inode number */ + __le64 createtime; /* creation time on server */ + u8 type; /* S_IFMT file type */ +} __packed; + static void cifs_fscache_fill_volume_coherency( struct cifs_tcon *tcon, struct cifs_fscache_volume_coherency_data *cd) @@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) void cifs_fscache_get_inode_cookie(struct inode *inode) { struct cifs_fscache_inode_coherency_data cd; + struct cifs_fscache_inode_key key; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + key.uniqueid = cpu_to_le64(cifsi->uniqueid); + key.createtime = cpu_to_le64(cifsi->createtime); + key.type = (inode->i_mode & S_IFMT) >> 12; cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd); cifsi->netfs.cache = fscache_acquire_cookie(tcon->fscache, 0, - &cifsi->uniqueid, sizeof(cifsi->uniqueid), + &key, sizeof(key), &cd, sizeof(cd), i_size_read(&cifsi->netfs.inode)); if (cifsi->netfs.cache) diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h index a3d73720914f..1f2ea9f5cc9a 100644 --- a/fs/smb/client/fscache.h +++ b/fs/smb/client/fscache.h @@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode, __cifs_readahead_to_fscache(inode, pos, len); } +static inline bool cifs_fscache_enabled(struct inode *inode) +{ + return fscache_cookie_enabled(cifs_inode_cookie(inode)); +} + #else /* CONFIG_CIFS_FSCACHE */ static inline void cifs_fscache_fill_coherency(struct inode *inode, @@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {} static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} +static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } static inline int cifs_fscache_query_occupancy(struct inode *inode, pgoff_t first, unsigned int nr_pages, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index d28ab0af6049..91b07ef9e25c 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1351,6 +1351,8 @@ cifs_find_inode(struct inode *inode, void *opaque) { struct cifs_fattr *fattr = opaque; + /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */ + /* don't match inode with different uniqueid */ if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index c012dfdba80d..855ac5a62edf 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -247,7 +247,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) { - if (ses_it->Suid == out.session_id) { + spin_lock(&ses_it->ses_lock); + if (ses_it->ses_status != SES_EXITING && + ses_it->Suid == out.session_id) { ses = ses_it; /* * since we are using the session outside the crit @@ -255,9 +257,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug * so increment its refcount */ cifs_smb_ses_inc_refcount(ses); + spin_unlock(&ses_it->ses_lock); found = true; goto search_end; } + spin_unlock(&ses_it->ses_lock); } } search_end: diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index c3771fc81328..33ac4f8f5050 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -138,9 +138,6 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); ret_buf->stats_from_time = ktime_get_real_seconds(); -#ifdef CONFIG_CIFS_DFS_UPCALL - INIT_LIST_HEAD(&ret_buf->dfs_ses_list); -#endif return ret_buf; } @@ -156,9 +153,6 @@ tconInfoFree(struct cifs_tcon *tcon) atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); kfree_sensitive(tcon->password); -#ifdef CONFIG_CIFS_DFS_UPCALL - dfs_put_root_smb_sessions(&tcon->dfs_ses_list); -#endif kfree(tcon->origin_fullpath); kfree(tcon); } @@ -487,6 +481,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index a9eaba8083b0..212ec6f66ec6 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } -static void +static int cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - CIFSSMBClose(xid, tcon, fid->netfid); + return CIFSSMBClose(xid, tcon, fid->netfid); } static int diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 82b84a4941dd..cc72be5a93a9 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); cifs_stats_inc( @@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 2ed456948f34..b156eefa75d7 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1412,14 +1412,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) memcpy(cfile->fid.create_guid, fid->create_guid, 16); } -static void +static int smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } -static void +static int smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile) { @@ -1430,7 +1430,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, &file_inf); if (rc) - return; + return rc; inode = d_inode(cfile->dentry); @@ -1459,6 +1459,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, /* End of file and Attributes should not have to be updated on close */ spin_unlock(&inode->i_lock); + return rc; } static int @@ -2480,6 +2481,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); @@ -3913,7 +3916,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, strcat(message, "W"); } if (!new_oplock) - strncpy(message, "None", sizeof(message)); + strscpy(message, "None"); cinode->oplock = new_oplock; cifs_dbg(FYI, "%s Lease granted on inode %p\n", message, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 3ea688558e6c..c0c4933af5fc 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3628,9 +3628,9 @@ replay_again: memcpy(&pbuf->network_open_info, &rsp->network_open_info, sizeof(pbuf->network_open_info)); + atomic_dec(&tcon->num_remote_opens); } - atomic_dec(&tcon->num_remote_opens); close_exit: SMB2_close_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 5a3ca62d2f07..1d6e54f7879e 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -659,7 +659,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) } spin_unlock(&server->srv_lock); if (!is_binding && !server->session_estab) { - strncpy(shdr->Signature, "BSRSPYL", 8); + strscpy(shdr->Signature, "BSRSPYL"); return 0; } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index f9c1fd32d0b8..5e83cb9da902 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -375,6 +375,7 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter); +DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter); DECLARE_EVENT_CLASS(smb3_inf_compound_done_class, TP_PROTO(unsigned int xid, @@ -415,7 +416,7 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done); - +DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done); DECLARE_EVENT_CLASS(smb3_inf_compound_err_class, TP_PROTO(unsigned int xid, @@ -461,6 +462,7 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err); +DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err); /* * For logging SMB3 Status code and Command for responses which return errors diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 8ca8a45c4c62..686b321c5a8b 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -167,7 +167,8 @@ struct ksmbd_share_config_response { __u16 force_uid; __u16 force_gid; __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; - __u32 reserved[112]; /* Reserved room */ + __u32 reserved[111]; /* Reserved room */ + __u32 payload_sz; __u32 veto_list_sz; __s8 ____payload[]; }; diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index 328a412259dc..a2f0a2edceb8 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, share->name = kstrdup(name, GFP_KERNEL); if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) { - share->path = kstrdup(ksmbd_share_config_path(resp), + int path_len = PATH_MAX; + + if (resp->payload_sz) + path_len = resp->payload_sz - resp->veto_list_sz; + + share->path = kstrndup(ksmbd_share_config_path(resp), path_len, GFP_KERNEL); if (share->path) share->path_sz = strlen(share->path); diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index a45f7dca482e..606aa3c5189a 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; } @@ -278,11 +283,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_DIRECTORY_LEASING; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || - (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && - conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d478fa0c57ab..5723bbf372d7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5857,8 +5857,9 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; - smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); + if (!rc) + smb_break_all_levII_oplock(work, fp, 0); out: kfree(new_name); return rc; diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index f29bb03f0dc4..8752ac82c557 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -65,6 +65,7 @@ struct ipc_msg_table_entry { struct hlist_node ipc_table_hlist; void *response; + unsigned int msg_sz; }; static struct delayed_work ipc_timer_work; @@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz) } memcpy(entry->response, payload, sz); + entry->msg_sz = sz; wake_up_interruptible(&entry->wait); ret = 0; break; @@ -453,6 +455,34 @@ out: return ret; } +static int ipc_validate_msg(struct ipc_msg_table_entry *entry) +{ + unsigned int msg_sz = entry->msg_sz; + + if (entry->type == KSMBD_EVENT_RPC_REQUEST) { + struct ksmbd_rpc_command *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; + } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) { + struct ksmbd_spnego_authen_response *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_spnego_authen_response) + + resp->session_key_len + resp->spnego_blob_len; + } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) { + struct ksmbd_share_config_response *resp = entry->response; + + if (resp->payload_sz) { + if (resp->payload_sz < resp->veto_list_sz) + return -EINVAL; + + msg_sz = sizeof(struct ksmbd_share_config_response) + + resp->payload_sz; + } + } + + return entry->msg_sz != msg_sz ? -EINVAL : 0; +} + static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) { struct ipc_msg_table_entry entry; @@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle ret = wait_event_interruptible_timeout(entry.wait, entry.response != NULL, IPC_WAIT_TIMEOUT); + if (entry.response) { + ret = ipc_validate_msg(&entry); + if (ret) { + kvfree(entry.response); + entry.response = NULL; + } + } out: down_write(&ipc_msg_table_lock); hash_del(&entry.ipc_table_hlist); diff --git a/fs/super.c b/fs/super.c index 71d9779c42b1..69ce6c600968 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev) return error; } -static void fs_bdev_super_get(void *data) -{ - struct super_block *sb = data; - - spin_lock(&sb_lock); - sb->s_count++; - spin_unlock(&sb_lock); -} - -static void fs_bdev_super_put(void *data) -{ - struct super_block *sb = data; - - put_super(sb); -} - const struct blk_holder_ops fs_holder_ops = { .mark_dead = fs_bdev_mark_dead, .sync = fs_bdev_sync, .freeze = fs_bdev_freeze, .thaw = fs_bdev_thaw, - .get_holder = fs_bdev_super_get, - .put_holder = fs_bdev_super_put, }; EXPORT_SYMBOL_GPL(fs_holder_ops); @@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, * writable from userspace even for a read-only block device. */ if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { - fput(bdev_file); + bdev_fput(bdev_file); return -EACCES; } @@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { if (fc) warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - fput(bdev_file); + bdev_fput(bdev_file); return -EBUSY; } spin_lock(&sb_lock); @@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb) generic_shutdown_super(sb); if (bdev) { sync_blockdev(bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } } diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c index 2307f8037efc..118dedef8ebe 100644 --- a/fs/vboxsf/file.c +++ b/fs/vboxsf/file.c @@ -218,6 +218,7 @@ const struct file_operations vboxsf_reg_fops = { .release = vboxsf_file_release, .fsync = noop_fsync, .splice_read = filemap_splice_read, + .setlease = simple_nosetlease, }; const struct inode_operations vboxsf_reg_iops = { diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index cabe8ac4fefc..ffb1d565da39 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -151,11 +151,11 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) if (!sbi->nls) { vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); err = -EINVAL; - goto fail_free; + goto fail_destroy_idr; } } - sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL); + sbi->bdi_id = ida_alloc(&vboxsf_bdi_ida, GFP_KERNEL); if (sbi->bdi_id < 0) { err = sbi->bdi_id; goto fail_free; @@ -221,9 +221,10 @@ fail_unmap: vboxsf_unmap_folder(sbi->root); fail_free: if (sbi->bdi_id >= 0) - ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + ida_free(&vboxsf_bdi_ida, sbi->bdi_id); if (sbi->nls) unload_nls(sbi->nls); +fail_destroy_idr: idr_destroy(&sbi->ino_idr); kfree(sbi); return err; @@ -268,7 +269,7 @@ static void vboxsf_put_super(struct super_block *sb) vboxsf_unmap_folder(sbi->root); if (sbi->bdi_id >= 0) - ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + ida_free(&vboxsf_bdi_ida, sbi->bdi_id); if (sbi->nls) unload_nls(sbi->nls); diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c index 72ac9320e6a3..9515bbf0b54c 100644 --- a/fs/vboxsf/utils.c +++ b/fs/vboxsf/utils.c @@ -440,7 +440,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, { const char *in; char *out; - size_t out_len; size_t out_bound_len; size_t in_bound_len; @@ -448,7 +447,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, in_bound_len = utf8_len; out = name; - out_len = 0; /* Reserve space for terminating 0 */ out_bound_len = name_bound_len - 1; @@ -469,7 +467,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, out += nb; out_bound_len -= nb; - out_len += nb; } *out = 0; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index d991eec05436..73a4b895de67 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -530,7 +530,8 @@ xfs_validate_sb_common( } if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), - XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) + XFS_FSB_TO_B(mp, sbp->sb_width), 0, + xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) return -EFSCORRUPTED; /* @@ -1323,8 +1324,10 @@ xfs_sb_get_secondary( } /* - * sunit, swidth, sectorsize(optional with 0) should be all in bytes, - * so users won't be confused by values in error messages. + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users + * won't be confused by values in error messages. This function returns false + * if the stripe geometry is invalid and the caller is unable to repair the + * stripe configuration later in the mount process. */ bool xfs_validate_stripe_geometry( @@ -1332,20 +1335,21 @@ xfs_validate_stripe_geometry( __s64 sunit, __s64 swidth, int sectorsize, + bool may_repair, bool silent) { if (swidth > INT_MAX) { if (!silent) xfs_notice(mp, "stripe width (%lld) is too large", swidth); - return false; + goto check_override; } if (sunit > swidth) { if (!silent) xfs_notice(mp, "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); - return false; + goto check_override; } if (sectorsize && (int)sunit % sectorsize) { @@ -1353,21 +1357,21 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe unit (%lld) must be a multiple of the sector size (%d)", sunit, sectorsize); - return false; + goto check_override; } if (sunit && !swidth) { if (!silent) xfs_notice(mp, "invalid stripe unit (%lld) and stripe width of 0", sunit); - return false; + goto check_override; } if (!sunit && swidth) { if (!silent) xfs_notice(mp, "invalid stripe width (%lld) and stripe unit of 0", swidth); - return false; + goto check_override; } if (sunit && (int)swidth % (int)sunit) { @@ -1375,9 +1379,27 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe width (%lld) must be a multiple of the stripe unit (%lld)", swidth, sunit); - return false; + goto check_override; } return true; + +check_override: + if (!may_repair) + return false; + /* + * During mount, mp->m_dalign will not be set unless the sunit mount + * option was set. If it was set, ignore the bad stripe alignment values + * and allow the validation and overwrite later in the mount process to + * attempt to overwrite the bad stripe alignment values with the values + * supplied by mount options. + */ + if (!mp->m_dalign) + return false; + if (!silent) + xfs_notice(mp, +"Will try to correct with specified mount options sunit (%d) and swidth (%d)", + BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); + return true; } /* diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 2e8e8d63d4eb..37b1ed1bc209 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); -extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, - __s64 sunit, __s64 swidth, int sectorsize, bool silent); +bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, + bool silent); uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index abff79a77c72..47a20cf5205f 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1044,9 +1044,7 @@ xchk_irele( struct xfs_scrub *sc, struct xfs_inode *ip) { - if (current->journal_info != NULL) { - ASSERT(current->journal_info == sc->tp); - + if (sc->tp) { /* * If we are in a transaction, we /cannot/ drop the inode * ourselves, because the VFS will trigger writeback, which diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1698507d1ac7..3f428620ebf2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -503,13 +503,6 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { }; - /* - * Writing back data in a transaction context can result in recursive - * transactions. This is bad, so issue a warning and get out of here. - */ - if (WARN_ON_ONCE(current->journal_info)) - return 0; - xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1a18c381127e..f0fa02264eda 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2030,7 +2030,7 @@ xfs_free_buftarg( fs_put_dax(btp->bt_daxdev, btp->bt_mount); /* the main block device is closed by kill_block_super */ if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) - fput(btp->bt_bdev_file); + bdev_fput(btp->bt_bdev_file); kfree(btp); } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index e64265bc0b33..74f1812b03cb 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -2039,8 +2039,10 @@ xfs_inodegc_want_queue_work( * - Memory shrinkers queued the inactivation worker and it hasn't finished. * - The queue depth exceeds the maximum allowable percpu backlog. * - * Note: If the current thread is running a transaction, we don't ever want to - * wait for other transactions because that could introduce a deadlock. + * Note: If we are in a NOFS context here (e.g. current thread is running a + * transaction) the we don't want to block here as inodegc progress may require + * filesystem resources we hold to make progress and that could result in a + * deadlock. Hence we skip out of here if we are in a scoped NOFS context. */ static inline bool xfs_inodegc_want_flush_work( @@ -2048,7 +2050,7 @@ xfs_inodegc_want_flush_work( unsigned int items, unsigned int shrinker_hits) { - if (current->journal_info) + if (current->flags & PF_MEMALLOC_NOFS) return false; if (shrinker_hits > 0) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ea48774f6b76..d55b42b2480d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1301,8 +1301,19 @@ xfs_link( */ if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) && tdp->i_projid != sip->i_projid)) { - error = -EXDEV; - goto error_return; + /* + * Project quota setup skips special files which can + * leave inodes in a PROJINHERIT directory without a + * project ID set. We need to allow links to be made + * to these "project-less" inodes because userspace + * expects them to succeed after project ID setup, + * but everything else should be rejected. + */ + if (!special_file(VFS_I(sip)->i_mode) || + sip->i_projid != 0) { + error = -EXDEV; + goto error_return; + } } if (!resblks) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c21f10ab0f5d..bce020374c5e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -485,7 +485,7 @@ xfs_open_devices( mp->m_logdev_targp = mp->m_ddev_targp; /* Handle won't be used, drop it */ if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); } return 0; @@ -497,10 +497,10 @@ xfs_open_devices( xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: if (rtdev_file) - fput(rtdev_file); + bdev_fput(rtdev_file); out_close_logdev: if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); return error; } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 3f7e3a09a49f..1636663707dc 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -268,19 +268,14 @@ static inline void xfs_trans_set_context( struct xfs_trans *tp) { - ASSERT(current->journal_info == NULL); tp->t_pflags = memalloc_nofs_save(); - current->journal_info = tp; } static inline void xfs_trans_clear_context( struct xfs_trans *tp) { - if (current->journal_info == tp) { - memalloc_nofs_restore(tp->t_pflags); - current->journal_info = NULL; - } + memalloc_nofs_restore(tp->t_pflags); } static inline void @@ -288,10 +283,8 @@ xfs_trans_switch_context( struct xfs_trans *old_tp, struct xfs_trans *new_tp) { - ASSERT(current->journal_info == old_tp); new_tp->t_pflags = old_tp->t_pflags; old_tp->t_pflags = 0; - current->journal_info = new_tp; } #endif /* __XFS_TRANS_H__ */ diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h deleted file mode 100644 index 570cd4da7210..000000000000 --- a/include/asm-generic/export.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef __ASM_GENERIC_EXPORT_H -#define __ASM_GENERIC_EXPORT_H - -/* - * <asm/export.h> and <asm-generic/export.h> are deprecated. - * Please include <linux/export.h> directly. - */ -#include <linux/export.h> - -#endif diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index eb4c369a79eb..35d4ca4f6122 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void); */ #define kvm_pmu_update_vcpu_events(vcpu) \ do { \ - if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \ + if (!has_vhe() && kvm_arm_support_pmu_v3()) \ vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c3e8f7cf96be..172c91879999 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1505,16 +1505,6 @@ struct blk_holder_ops { * Thaw the file system mounted on the block device. */ int (*thaw)(struct block_device *bdev); - - /* - * If needed, get a reference to the holder. - */ - void (*get_holder)(void *holder); - - /* - * Release the holder. - */ - void (*put_holder)(void *holder); }; /* @@ -1585,6 +1575,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) int bdev_freeze(struct block_device *bdev); int bdev_thaw(struct block_device *bdev); +void bdev_fput(struct file *bdev_file); struct io_comp_batch { struct request *req_list; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4f20f62f9d63..890e152d553e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1574,12 +1574,26 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if underlying BPF program is sleepable we go through tasks trace + * RCU GP and then "classic" RCU GP + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index cb0d6cd1c12f..60693a145894 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -90,6 +90,14 @@ enum cc_attr { * Examples include TDX Guest. */ CC_ATTR_HOTPLUG_DISABLED, + + /** + * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. + * + * The host kernel is running with the necessary features + * enabled to run SEV-SNP guests. + */ + CC_ATTR_HOST_SEV_SNP, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM @@ -107,10 +115,14 @@ enum cc_attr { * * FALSE - Specified Confidential Computing attribute is not active */ bool cc_platform_has(enum cc_attr attr); +void cc_platform_set(enum cc_attr attr); +void cc_platform_clear(enum cc_attr attr); #else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ static inline bool cc_platform_has(enum cc_attr attr) { return false; } +static inline void cc_platform_set(enum cc_attr attr) { } +static inline void cc_platform_clear(enum cc_attr attr) { } #endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ diff --git a/include/linux/device.h b/include/linux/device.h index 97c4b046c09d..b9f5464f44ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1247,6 +1247,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 770755df852f..70cd7258cd29 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -245,7 +245,6 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * max utilization to the allowed CPU capacity before calculating * effective performance. */ - max_util = map_util_perf(max_util); max_util = min(max_util, allowed_cpu_cap); /* diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h index 9a9b88962c29..2b85fe9e7f9a 100644 --- a/include/linux/framer/framer.h +++ b/include/linux/framer/framer.h @@ -181,12 +181,12 @@ static inline int framer_notifier_unregister(struct framer *framer, return -ENOSYS; } -struct framer *framer_get(struct device *dev, const char *con_id) +static inline struct framer *framer_get(struct device *dev, const char *con_id) { return ERR_PTR(-ENOSYS); } -void framer_put(struct device *dev, struct framer *framer) +static inline void framer_put(struct device *dev, struct framer *framer) { } diff --git a/include/linux/fs.h b/include/linux/fs.h index 00fc429b0af0..8dfd53b52744 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -121,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) +/* File writes are restricted (block device specific) */ +#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index dc75f802e284..f8617eaf08ba 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -646,8 +646,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_device *gpio_device_find(const void *data, int (*match)(struct gpio_chip *gc, const void *data)); -struct gpio_device *gpio_device_find_by_label(const char *label); -struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); @@ -814,6 +812,9 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); int gpio_device_get_base(struct gpio_device *gdev); const char *gpio_device_get_label(struct gpio_device *gdev); +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + #else /* CONFIG_GPIOLIB */ #include <asm/bug.h> @@ -843,6 +844,18 @@ static inline const char *gpio_device_get_label(struct gpio_device *gdev) return NULL; } +static inline struct gpio_device *gpio_device_find_by_label(const char *label) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 76121c2bb4f8..5c9bdd3ffccc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -67,6 +67,8 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared + * interrupt. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +84,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_COND_ONESHOT 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e24893625085..05df0e399d7c 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -294,7 +294,6 @@ struct io_ring_ctx { struct io_submit_state submit_state; - struct io_buffer_list *io_bl; struct xarray io_bl_xa; struct io_hash_table cancel_table_locked; diff --git a/include/linux/libata.h b/include/linux/libata.h index 26d68115afb8..324d792e7c78 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -107,6 +107,7 @@ enum { ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ diff --git a/include/linux/mman.h b/include/linux/mman.h index dc7048824be8..bcb201ab7a41 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -162,6 +162,14 @@ calc_vm_flag_bits(unsigned long flags) unsigned long vm_commit_limit(void); +#ifndef arch_memory_deny_write_exec_supported +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return true; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported +#endif + /* * Denies creating a writable executable mapping or gaining executable permissions. * diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 3921fbed0b28..51421fdbb0ba 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -17,10 +17,12 @@ * build_OID_registry.pl to generate the data for look_up_OID(). */ enum OID { + OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */ OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */ + OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */ @@ -28,6 +30,7 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ + OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */ @@ -64,6 +67,7 @@ enum OID { OID_PKU2U, /* 1.3.5.1.5.2.7 */ OID_Scram, /* 1.3.6.1.5.5.14 */ OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ + OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index fcc06c300a72..5d3a0cccc6bf 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -11,8 +11,8 @@ #include <linux/types.h> -/* 15 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 15 +/* 31 pointers + header align the folio_batch structure to a power of two */ +#define PAGEVEC_SIZE 31 struct folio; diff --git a/include/linux/regmap.h b/include/linux/regmap.h index b743241cfb7c..d470303b1bbb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1230,6 +1230,7 @@ int regmap_multi_reg_write_bypassed(struct regmap *map, int regmap_raw_write_async(struct regmap *map, unsigned int reg, const void *val, size_t val_len); int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val); +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val); int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_noinc_read(struct regmap *map, unsigned int reg, @@ -1739,6 +1740,13 @@ static inline int regmap_read(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_read_bypassed(struct regmap *map, unsigned int reg, + unsigned int *val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len) { diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 35f3a4a8ceb1..acf7e1a3f3de 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio) /* * Using folio_mapping() is quite slow because of the actual call * instruction. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (folio_test_large(folio) || !folio_test_lru(folio)) + if (folio_test_large(folio)) return false; mapping = (struct address_space *) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0c7c67b3a87b..9d24aec064e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -753,8 +753,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -875,10 +873,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 3c6caa5abc7c..e9ec32fb97d4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -44,10 +44,9 @@ typedef u32 depot_stack_handle_t; union handle_parts { depot_stack_handle_t handle; struct { - /* pool_index is offset by 1 */ - u32 pool_index : DEPOT_POOL_INDEX_BITS; - u32 offset : DEPOT_OFFSET_BITS; - u32 extra : STACK_DEPOT_EXTRA_BITS; + u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index c6540ceea143..0982d1d52b24 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -22,7 +22,7 @@ * * @read: returns the current cycle value * @mask: bitmask for two's complement - * subtraction of non 64 bit counters, + * subtraction of non-64-bit counters, * see CYCLECOUNTER_MASK() helper macro * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) @@ -35,7 +35,7 @@ struct cyclecounter { }; /** - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds + * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds * Contains the state needed by timecounter_read() to detect * cycle counter wrap around. Initialize with * timecounter_init(). Also used to convert cycle counts into the @@ -66,6 +66,8 @@ struct timecounter { * @cycles: Cycles * @mask: bit mask for maintaining the 'frac' field * @frac: pointer to storage for the fractional nanoseconds. + * + * Returns: cycle counter cycles converted to nanoseconds */ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, u64 cycles, u64 mask, u64 *frac) @@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, /** * timecounter_adjtime - Shifts the time of the clock. + * @tc: The &struct timecounter to adjust * @delta: Desired change in nanoseconds. */ static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) @@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc, * * In other words, keeps track of time since the same epoch as * the function which generated the initial time stamp. + * + * Returns: nanoseconds since the initial time stamp */ extern u64 timecounter_read(struct timecounter *tc); @@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc); * * This allows conversion of cycle counter values which were generated * in the past. + * + * Returns: cycle counter converted to nanoseconds since the initial time stamp */ extern u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7e50cbd97f86..0ea7823b7f31 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* - * ktime_get() family: read the current time in a multitude of ways, + * ktime_get() family - read the current time in a multitude of ways. * * The default time reference is CLOCK_MONOTONIC, starting at * boot time but not counting the time spent in suspend. * For other references, use the functions with "real", "clocktai", * "boottime" and "raw" suffixes. * - * To get the time in a different format, use the ones wit + * To get the time in a different format, use the ones with * "ns", "ts64" and "seconds" suffix. * * See Documentation/core-api/timekeeping.rst for more details. @@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format + * + * Returns: real (wall) time in ktime_t format */ static inline ktime_t ktime_get_real(void) { @@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void) } /** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format + * ktime_get_boottime - Get monotonic time since boot in ktime_t format * * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the * time spent in suspend. + * + * Returns: monotonic time since boot in ktime_t format */ static inline ktime_t ktime_get_boottime(void) { @@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void) } /** - * ktime_get_clocktai - Returns the TAI time of day in ktime_t format + * ktime_get_clocktai - Get the TAI time of day in ktime_t format + * + * Returns: the TAI time of day in ktime_t format */ static inline ktime_t ktime_get_clocktai(void) { @@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void) /** * ktime_mono_to_real - Convert monotonic time to clock realtime + * @mono: monotonic time to convert + * + * Returns: time converted to realtime clock */ static inline ktime_t ktime_mono_to_real(ktime_t mono) { return ktime_mono_to_any(mono, TK_OFFS_REAL); } +/** + * ktime_get_ns - Get the current time in nanoseconds + * + * Returns: current time converted to nanoseconds + */ static inline u64 ktime_get_ns(void) { return ktime_to_ns(ktime_get()); } +/** + * ktime_get_real_ns - Get the current real/wall time in nanoseconds + * + * Returns: current real time converted to nanoseconds + */ static inline u64 ktime_get_real_ns(void) { return ktime_to_ns(ktime_get_real()); } +/** + * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds + * + * Returns: current boottime converted to nanoseconds + */ static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } +/** + * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds + * + * Returns: current TAI time converted to nanoseconds + */ static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } +/** + * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds + * + * Returns: current raw monotonic time converted to nanoseconds + */ static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); @@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* - * struct ktime_timestanps - Simultaneous mono/boot/real timestamps +/** + * struct ktime_timestamps - Simultaneous mono/boot/real timestamps * @mono: Monotonic timestamp * @boot: Boottime timestamp * @real: Realtime timestamp @@ -242,7 +276,8 @@ struct ktime_timestamps { * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time - * @clock_was_set_seq: The sequence number of clock was set events + * @cs_id: Clocksource ID + * @clock_was_set_seq: The sequence number of clock-was-set events * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { diff --git a/include/linux/timer.h b/include/linux/timer.h index 14a633ba61d6..e67ecd1cbc97 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -22,7 +22,7 @@ #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif -/** +/* * @TIMER_DEFERRABLE: A deferrable timer will work normally when the * system is busy, but will not cause a CPU to come out of idle just * to service it; instead, the timer will be serviced when the CPU @@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { } * or not. Callers must ensure serialization wrt. other operations done * to this timer, eg. interrupt contexts, or other CPUs on SMP. * - * return value: 1 if the timer is pending, 0 if not. + * Returns: 1 if the timer is pending, 0 if not. */ static inline int timer_pending(const struct timer_list * timer) { @@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer); * See timer_delete_sync() for detailed explanation. * * Do not use in new code. Use timer_delete_sync() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer_sync(struct timer_list *timer) { @@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer) * See timer_delete() for detailed explanation. * * Do not use in new code. Use timer_delete() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer(struct timer_list *timer) { diff --git a/include/linux/udp.h b/include/linux/udp.h index 3748e82b627b..17539d089666 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8701ca5f31ee..5c12761cbc0e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -176,6 +176,15 @@ enum { */ HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the Bluetooth Device Address provided by + * the 'local-bd-address' fwnode property is incorrectly specified in + * big-endian order. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BDADDR_PROPERTY_BROKEN, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2e2be4fd2bb6..1e09329acc42 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4991,6 +4991,7 @@ struct cfg80211_ops { * set this flag to update channels on beacon hints. * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link * of an NSTR mobile AP MLD. + * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -5002,6 +5003,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), + WIPHY_FLAG_DISABLE_WEXT = BIT(9), WIPHY_FLAG_MESH_AUTH = BIT(10), WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11), WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12), diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 9ab4bf704e86..ccf171f7eb60 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -175,6 +175,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 76147feb0d10..4eeedf14711b 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -39,7 +39,6 @@ enum TRI_STATE { #define COMP_ENTRY_SIZE 64 #define RX_BUFFERS_PER_QUEUE 512 -#define MANA_RX_DATA_ALIGN 64 #define MAX_SEND_BUFFERS_PER_QUEUE 256 diff --git a/include/net/sock.h b/include/net/sock.h index b5e00702acc1..f57bfd8a2ad2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1759,6 +1759,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3cb4dc9bd70e..3d54de168a6d 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -188,6 +188,8 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, { if (!compl) return; + if (!compl->tx_timestamp) + return; *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); } diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 4ce1988b2ba0..f40915d2ecee 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -12,6 +12,7 @@ struct request; struct scsi_driver { struct device_driver gendrv; + int (*resume)(struct device *); void (*rescan)(struct device *); blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b259d42a1e1a..129001f600fc 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht); #define scsi_template_proc_dir(sht) NULL #endif extern void scsi_scan_host(struct Scsi_Host *); +extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index e0629699b563..1a3c6f66f620 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -267,6 +267,7 @@ struct cs35l56_base { bool fw_patched; bool secured; bool can_hibernate; + bool fw_owns_asp1; bool cal_data_valid; s8 cal_index; struct cirrus_amp_cal_data cal_data; @@ -283,6 +284,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); +int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base); int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base); diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 9c94ba7c183d..575e55aa08ca 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -109,11 +109,9 @@ struct hda_codec_ops { void (*unsol_event)(struct hda_codec *codec, unsigned int res); void (*set_power_state)(struct hda_codec *codec, hda_nid_t fg, unsigned int power_state); -#ifdef CONFIG_PM int (*suspend)(struct hda_codec *codec); int (*resume)(struct hda_codec *codec); int (*check_power_status)(struct hda_codec *codec, hda_nid_t nid); -#endif void (*stream_pm)(struct hda_codec *codec, hda_nid_t nid, bool on); }; @@ -259,11 +257,9 @@ struct hda_codec { unsigned int no_stream_clean_at_suspend:1; /* do not clean streams at suspend */ unsigned int ctl_dev_id:1; /* old control element id build behaviour */ -#ifdef CONFIG_PM unsigned long power_on_acct; unsigned long power_off_acct; unsigned long power_jiffies; -#endif /* filter the requested power state per nid */ unsigned int (*power_filter)(struct hda_codec *codec, hda_nid_t nid, @@ -481,10 +477,8 @@ extern const struct dev_pm_ops hda_codec_driver_pm; static inline int hda_call_check_power_status(struct hda_codec *codec, hda_nid_t nid) { -#ifdef CONFIG_PM if (codec->patch_ops.check_power_status) return codec->patch_ops.check_power_status(codec, nid); -#endif return 0; } @@ -495,14 +489,9 @@ int hda_call_check_power_status(struct hda_codec *codec, hda_nid_t nid) #define snd_hda_power_up_pm(codec) snd_hdac_power_up_pm(&(codec)->core) #define snd_hda_power_down(codec) snd_hdac_power_down(&(codec)->core) #define snd_hda_power_down_pm(codec) snd_hdac_power_down_pm(&(codec)->core) -#ifdef CONFIG_PM void snd_hda_codec_set_power_save(struct hda_codec *codec, int delay); void snd_hda_set_power_save(struct hda_bus *bus, int delay); void snd_hda_update_power_acct(struct hda_codec *codec); -#else -static inline void snd_hda_codec_set_power_save(struct hda_codec *codec, int delay) {} -static inline void snd_hda_set_power_save(struct hda_bus *bus, int delay) {} -#endif static inline bool hda_codec_need_resume(struct hda_codec *codec) { diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 9ce46edc62a5..2040a470ddb4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code { KFD_EC_MASK(EC_DEVICE_NEW)) #define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \ KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE)) +#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED)) /* Checks for exception code types for KFD search */ +#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX) #define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) #define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) #define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) +#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET)) /* Runtime enable states */ diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h index c72ce387286a..30a5c1a59376 100644 --- a/include/uapi/scsi/scsi_bsg_mpi3mr.h +++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h @@ -382,7 +382,7 @@ struct mpi3mr_bsg_in_reply_buf { __u8 mpi_reply_type; __u8 rsvd1; __u16 rsvd2; - __u8 reply_buf[1]; + __u8 reply_buf[]; }; /** diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index cb2afcebbdf5..a35e12f8e68b 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -328,6 +328,7 @@ struct ufs_pwr_mode_info { * @op_runtime_config: called to config Operation and runtime regs Pointers * @get_outstanding_cqs: called to get outstanding completion queues * @config_esi: called to config Event Specific Interrupt + * @config_scsi_dev: called to configure SCSI device parameters */ struct ufs_hba_variant_ops { const char *name; diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 5d5c0b8efff2..c71ddb6d4691 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -19,12 +19,6 @@ #include <vdso/time32.h> #include <vdso/time64.h> -#ifdef CONFIG_ARM64 -#include <asm/page-def.h> -#else -#include <asm/page.h> -#endif - #ifdef CONFIG_ARCH_HAS_VDSO_DATA #include <asm/vdso/data.h> #else @@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden */ union vdso_data_store { struct vdso_data data[CS_BASES]; - u8 page[PAGE_SIZE]; + u8 page[1U << CONFIG_PAGE_SHIFT]; }; /* diff --git a/init/initramfs.c b/init/initramfs.c index da79760b8be3..a298a3854a80 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -367,7 +367,7 @@ static int __init do_name(void) if (S_ISREG(mode)) { int ml = maybe_link(); if (ml >= 0) { - int openflags = O_WRONLY|O_CREAT; + int openflags = O_WRONLY|O_CREAT|O_LARGEFILE; if (ml != 1) openflags |= O_TRUNC; wfile = filp_open(collected, openflags, mode); @@ -682,7 +682,7 @@ static void __init populate_initrd_image(char *err) printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5d4b448fdc50..4521c2b66b98 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -147,6 +147,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, static void io_queue_sqe(struct io_kiocb *req); struct kmem_cache *req_cachep; +static struct workqueue_struct *iou_wq __ro_after_init; static int __read_mostly sysctl_io_uring_disabled; static int __read_mostly sysctl_io_uring_group = -1; @@ -350,7 +351,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) err: kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); return NULL; @@ -1982,10 +1982,15 @@ fail: err = -EBADFD; if (!io_file_can_poll(req)) goto fail; - err = -ECANCELED; - if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK) - goto fail; - return; + if (req->file->f_flags & O_NONBLOCK || + req->file->f_mode & FMODE_NOWAIT) { + err = -ECANCELED; + if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK) + goto fail; + return; + } else { + req->flags &= ~REQ_F_APOLL_MULTISHOT; + } } if (req->flags & REQ_F_FORCE_ASYNC) { @@ -2926,7 +2931,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_napi_free(ctx); kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); } @@ -3161,7 +3165,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) * noise and overhead, there's no discernable change in runtime * over using system_wq. */ - queue_work(system_unbound_wq, &ctx->exit_work); + queue_work(iou_wq, &ctx->exit_work); } static int io_uring_release(struct inode *inode, struct file *file) @@ -3443,14 +3447,15 @@ static void *io_uring_validate_mmap_request(struct file *file, ptr = ctx->sq_sqes; break; case IORING_OFF_PBUF_RING: { + struct io_buffer_list *bl; unsigned int bgid; bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; - rcu_read_lock(); - ptr = io_pbuf_get_address(ctx, bgid); - rcu_read_unlock(); - if (!ptr) - return ERR_PTR(-EINVAL); + bl = io_pbuf_get_bl(ctx, bgid); + if (IS_ERR(bl)) + return bl; + ptr = bl->buf_ring; + io_put_bl(ctx, bl); break; } default: @@ -4185,6 +4190,8 @@ static int __init io_uring_init(void) io_buf_cachep = KMEM_CACHE(io_buffer, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); + iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64); + #ifdef CONFIG_SYSCTL register_sysctl_init("kernel", kernel_io_uring_disabled_table); #endif diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 693c26da4ee1..3aa16e27f509 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -17,8 +17,6 @@ #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) -#define BGID_ARRAY 64 - /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) @@ -40,13 +38,9 @@ struct io_buf_free { int inuse; }; -static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, - struct io_buffer_list *bl, - unsigned int bgid) +static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, + unsigned int bgid) { - if (bl && bgid < BGID_ARRAY) - return &bl[bgid]; - return xa_load(&ctx->io_bl_xa, bgid); } @@ -55,7 +49,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, { lockdep_assert_held(&ctx->uring_lock); - return __io_buffer_get_list(ctx, ctx->io_bl, bgid); + return __io_buffer_get_list(ctx, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, @@ -67,11 +61,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, * always under the ->uring_lock, but the RCU lookup from mmap does. */ bl->bgid = bgid; - smp_store_release(&bl->is_ready, 1); - - if (bgid < BGID_ARRAY) - return 0; - + atomic_set(&bl->refs, 1); return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } @@ -208,24 +198,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, return ret; } -static __cold int io_init_bl_list(struct io_ring_ctx *ctx) -{ - struct io_buffer_list *bl; - int i; - - bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); - if (!bl) - return -ENOMEM; - - for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&bl[i].buf_list); - bl[i].bgid = i; - } - - smp_store_release(&ctx->io_bl, bl); - return 0; -} - /* * Mark the given mapped range as free for reuse */ @@ -294,24 +266,24 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return i; } +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +{ + if (atomic_dec_and_test(&bl->refs)) { + __io_remove_buffers(ctx, bl, -1U); + kfree_rcu(bl, rcu); + } +} + void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; struct list_head *item, *tmp; struct io_buffer *buf; unsigned long index; - int i; - - for (i = 0; i < BGID_ARRAY; i++) { - if (!ctx->io_bl) - break; - __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); - } xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); - __io_remove_buffers(ctx, bl, -1U); - kfree_rcu(bl, rcu); + io_put_bl(ctx, bl); } /* @@ -489,12 +461,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) io_ring_submit_lock(ctx, issue_flags); - if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { - ret = io_init_bl_list(ctx); - if (ret) - goto err; - } - bl = io_buffer_get_list(ctx, p->bgid); if (unlikely(!bl)) { bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); @@ -507,14 +473,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) if (ret) { /* * Doesn't need rcu free as it was never visible, but - * let's keep it consistent throughout. Also can't - * be a lower indexed array group, as adding one - * where lookup failed cannot happen. + * let's keep it consistent throughout. */ - if (p->bgid >= BGID_ARRAY) - kfree_rcu(bl, rcu); - else - WARN_ON_ONCE(1); + kfree_rcu(bl, rcu); goto err; } } @@ -679,12 +640,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; - if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { - int ret = io_init_bl_list(ctx); - if (ret) - return ret; - } - bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ @@ -733,11 +688,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (!bl->is_buf_ring) return -EINVAL; - __io_remove_buffers(ctx, bl, -1U); - if (bl->bgid >= BGID_ARRAY) { - xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree_rcu(bl, rcu); - } + xa_erase(&ctx->io_bl_xa, bl->bgid); + io_put_bl(ctx, bl); return 0; } @@ -767,23 +719,35 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg) return 0; } -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, + unsigned long bgid) { struct io_buffer_list *bl; + bool ret; - bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); - - if (!bl || !bl->is_mmap) - return NULL; /* - * Ensure the list is fully setup. Only strictly needed for RCU lookup - * via mmap, and in that case only for the array indexed groups. For - * the xarray lookups, it's either visible and ready, or not at all. + * We have to be a bit careful here - we're inside mmap and cannot grab + * the uring_lock. This means the buffer_list could be simultaneously + * going away, if someone is trying to be sneaky. Look it up under rcu + * so we know it's not going away, and attempt to grab a reference to + * it. If the ref is already zero, then fail the mapping. If successful, + * the caller will call io_put_bl() to drop the the reference at at the + * end. This may then safely free the buffer_list (and drop the pages) + * at that point, vm_insert_pages() would've already grabbed the + * necessary vma references. */ - if (!smp_load_acquire(&bl->is_ready)) - return NULL; - - return bl->buf_ring; + rcu_read_lock(); + bl = xa_load(&ctx->io_bl_xa, bgid); + /* must be a mmap'able buffer ring and have pages */ + ret = false; + if (bl && bl->is_mmap) + ret = atomic_inc_not_zero(&bl->refs); + rcu_read_unlock(); + + if (ret) + return bl; + + return ERR_PTR(-EINVAL); } /* diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 1c7b654ee726..df365b8860cf 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -25,12 +25,12 @@ struct io_buffer_list { __u16 head; __u16 mask; + atomic_t refs; + /* ring mapped provided buffers */ __u8 is_buf_ring; /* ring mapped provided buffers, but mmap'ed by application */ __u8 is_mmap; - /* bl is visible from an RCU point of view for lookup */ - __u8 is_ready; }; struct io_buffer { @@ -61,7 +61,9 @@ void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid); +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, + unsigned long bgid); static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) { diff --git a/io_uring/rw.c b/io_uring/rw.c index 0585ebcc9773..c8d48287439e 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -937,6 +937,13 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) ret = __io_read(req, issue_flags); /* + * If the file doesn't support proper NOWAIT, then disable multishot + * and stay in single shot mode. + */ + if (!io_file_supports_nowait(req)) + req->flags &= ~REQ_F_APOLL_MULTISHOT; + + /* * If we get -EAGAIN, recycle our buffer and just let normal poll * handling arm it. */ @@ -955,7 +962,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) /* * Any successful return value will keep the multishot read armed. */ - if (ret > 0) { + if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) { /* * Put our buffer and post a CQE. If we fail to post a CQE, then * jump to the termination path. This request is then done. diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 368c5d86b5b7..e497011261b8 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -4,7 +4,7 @@ ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) # ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif -CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) +CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 86571e760dd6..343c3456c8dd 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -38,7 +38,7 @@ /* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */ #define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8) -#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ) +#define KERN_VM_SZ (SZ_4G + GUARD_SZ) struct bpf_arena { struct bpf_map map; @@ -110,7 +110,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr) return ERR_PTR(-EINVAL); vm_range = (u64)attr->max_entries * PAGE_SIZE; - if (vm_range > (1ull << 32)) + if (vm_range > SZ_4G) return ERR_PTR(-E2BIG); if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32)) @@ -301,7 +301,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad if (pgoff) return -EINVAL; - if (len > (1ull << 32)) + if (len > SZ_4G) return -E2BIG; /* if user_vm_start was specified at arena creation time */ @@ -322,7 +322,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad if (WARN_ON_ONCE(arena->user_vm_start)) /* checks at map creation time should prevent this */ return -EFAULT; - return round_up(ret, 1ull << 32); + return round_up(ret, SZ_4G); } static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) @@ -346,7 +346,7 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) return -EBUSY; /* Earlier checks should prevent this */ - if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff)) + if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff)) return -EFAULT; if (remember_vma(arena, vma)) @@ -420,7 +420,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt if (uaddr & ~PAGE_MASK) return 0; pgoff = compute_pgoff(arena, uaddr); - if (pgoff + page_cnt > page_cnt_max) + if (pgoff > page_cnt_max - page_cnt) /* requested address will be outside of user VMA */ return 0; } @@ -447,7 +447,13 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt goto out; uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE); - /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */ + /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1 + * will not overflow 32-bit. Lower 32-bit need to represent + * contiguous user address range. + * Map these pages at kern_vm_start base. + * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow + * lower 32-bit and it's ok. + */ ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32, kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages); if (ret) { @@ -510,6 +516,11 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt) if (!page) continue; if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */ + /* Optimization for the common case of page_cnt==1: + * If page wasn't mapped into some user vma there + * is no need to call zap_pages which is slow. When + * page_cnt is big it's faster to do the batched zap. + */ zap_pages(arena, full_uaddr, 1); vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE); __free_page(page); diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index addf3dd57b59..35e1ddca74d2 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -80,6 +80,18 @@ static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key return -EOPNOTSUPP; } +/* Called from syscall */ +static int bloom_map_alloc_check(union bpf_attr *attr) +{ + if (attr->value_size > KMALLOC_MAX_SIZE) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + return -E2BIG; + + return 0; +} + static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; @@ -191,6 +203,7 @@ static u64 bloom_map_mem_usage(const struct bpf_map *map) BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bloom_map_alloc_check, .map_alloc = bloom_map_alloc, .map_free = bloom_map_free, .map_get_next_key = bloom_map_get_next_key, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a89587859571..449b9a5d3fe3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ae2ff73bde7e..c287925471f6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link) atomic64_inc(&link->refcnt); } +static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) +{ + struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); + + /* free bpf_link and its containing memory */ + link->ops->dealloc_deferred(link); +} + +static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_link_defer_dealloc_rcu_gp(rcu); + else + call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); +} + /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + bool sleepable = false; + bpf_link_free_id(link->id); if (link->prog) { + sleepable = link->prog->sleepable; /* detach BPF program, clean up used resources */ link->ops->release(link); bpf_prog_put(link->prog); } - /* free bpf_link and its containing memory */ - link->ops->dealloc(link); + if (link->ops->dealloc_deferred) { + /* schedule BPF link deallocation; if underlying BPF program + * is sleepable, we need to first wait for RCU tasks trace + * sync, then go through "classic" RCU grace period + */ + if (sleepable) + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + else + call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); + } + if (link->ops->dealloc) + link->ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) @@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_raw_tp_link_lops = { .release = bpf_raw_tp_link_release, - .dealloc = bpf_raw_tp_link_dealloc, + .dealloc_deferred = bpf_raw_tp_link_dealloc, .show_fdinfo = bpf_raw_tp_link_show_fdinfo, .fill_link_info = bpf_raw_tp_link_fill_link_info, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 63749ad5ac6b..98188379d5c7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5682,6 +5682,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_FLOW_KEYS; } +static bool is_arena_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + return reg->type == PTR_TO_ARENA; +} + static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { #ifdef CONFIG_NET [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], @@ -6694,6 +6701,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(env, min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { @@ -7019,7 +7031,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) || - is_sk_reg(env, insn->dst_reg)) { + is_sk_reg(env, insn->dst_reg) || + is_arena_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str(env, reg_state(env, insn->dst_reg)->type)); @@ -14014,6 +14027,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n"); return -EINVAL; } + if (!env->prog->aux->arena) { + verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n"); + return -EINVAL; + } } else { if ((insn->off != 0 && insn->off != 8 && insn->off != 16 && insn->off != 32) || insn->imm) { @@ -14046,8 +14063,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->imm) { /* off == BPF_ADDR_SPACE_CAST */ mark_reg_unknown(env, regs, insn->dst_reg); - if (insn->imm == 1) /* cast from as(1) to as(0) */ + if (insn->imm == 1) { /* cast from as(1) to as(0) */ dst_reg->type = PTR_TO_ARENA; + /* PTR_TO_ARENA is 32-bit */ + dst_reg->subreg_def = env->insn_idx + 1; + } } else if (insn->off == 0) { /* case: R1 = R2 * copy register state to dest reg @@ -18359,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) } if (!env->prog->jit_requested) { verbose(env, "JIT is required to use arena\n"); + fdput(f); return -EOPNOTSUPP; } if (!bpf_jit_supports_arena()) { verbose(env, "JIT doesn't support arena\n"); + fdput(f); return -EOPNOTSUPP; } env->prog->aux->arena = (void *)map; if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { verbose(env, "arena's user address must be set via map_extra or mmap()\n"); + fdput(f); return -EINVAL; } } @@ -19601,8 +19624,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { /* convert to 32-bit mov that clears upper 32-bit */ insn->code = BPF_ALU | BPF_MOV | BPF_X; - /* clear off, so it's a normal 'wX = wY' from JIT pov */ + /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */ insn->off = 0; + insn->imm = 0; } /* cast from as(0) to as(1) should be handled by JIT */ goto next_insn; } diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index bbb6c3cb00e4..066668799f75 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -366,8 +366,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) crashk_low_res.start = low_base; crashk_low_res.end = low_base + low_size - 1; +#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY insert_resource(&iomem_resource, &crashk_low_res); #endif +#endif return 0; } @@ -448,8 +450,12 @@ retry: crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; +#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + insert_resource(&iomem_resource, &crashk_res); +#endif } +#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY static __init int insert_crashkernel_resources(void) { if (crashk_res.start < crashk_res.end) @@ -462,3 +468,4 @@ static __init int insert_crashkernel_resources(void) } early_initcall(insert_crashkernel_resources); #endif +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad3eaf2ab959..bf9ae8a8686f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index c3ced519e14b..f3e0329337f6 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -236,6 +236,10 @@ choice possible to load a signed module containing the algorithm to check the signature on that module. +config MODULE_SIG_SHA1 + bool "Sign modules with SHA-1" + select CRYPTO_SHA1 + config MODULE_SIG_SHA256 bool "Sign modules with SHA-256" select CRYPTO_SHA256 @@ -265,6 +269,7 @@ endchoice config MODULE_SIG_HASH string depends on MODULE_SIG || IMA_APPRAISE_MODSIG + default "sha1" if MODULE_SIG_SHA1 default "sha256" if MODULE_SIG_SHA256 default "sha384" if MODULE_SIG_SHA384 default "sha512" if MODULE_SIG_SHA512 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ca5146006b94..adf99c05adca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2009,6 +2009,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } diff --git a/kernel/sys.c b/kernel/sys.c index f8e543f1e38a..8bb106a56b3a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2408,8 +2408,11 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; - /* PARISC cannot allow mdwe as it needs writable stacks */ - if (IS_ENABLED(CONFIG_PARISC)) + /* + * EOPNOTSUPP might be more appropriate here in principle, but + * existing userspace depends on EINVAL specifically. + */ + if (!arch_memory_deny_write_exec_supported()) return -EINVAL; current_bits = get_current_mdwe(); diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 9de66bbbb3d1..4782edcbe7b9 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -129,15 +129,17 @@ static int posix_clock_open(struct inode *inode, struct file *fp) goto out; } pccontext->clk = clk; - fp->private_data = pccontext; - if (clk->ops.open) + if (clk->ops.open) { err = clk->ops.open(pccontext, fp->f_mode); - else - err = 0; - - if (!err) { - get_device(clk->dev); + if (err) { + kfree(pccontext); + goto out; + } } + + fp->private_data = pccontext; + get_device(clk->dev); + err = 0; out: up_read(&clk->rwsem); return err; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 269e21590df5..1331216a9cae 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -697,6 +697,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu) /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted + * @now: current ktime_t * * Called from interrupt entry when the CPU was idle * @@ -794,7 +795,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime, * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * - * This function returns -1 if NOHZ is not enabled. + * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu */ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { @@ -820,7 +821,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * - * This function returns -1 if NOHZ is not enabled. + * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu */ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { @@ -1287,6 +1288,8 @@ void tick_nohz_irq_exit(void) /** * tick_nohz_idle_got_tick - Check whether or not the tick handler has run + * + * Return: %true if the tick handler has run, otherwise %false */ bool tick_nohz_idle_got_tick(void) { @@ -1305,6 +1308,8 @@ bool tick_nohz_idle_got_tick(void) * stopped, it returns the next hrtimer. * * Called from power state control code with interrupts disabled + * + * Return: the next expiration time */ ktime_t tick_nohz_get_next_hrtimer(void) { @@ -1320,6 +1325,8 @@ ktime_t tick_nohz_get_next_hrtimer(void) * The return value of this function and/or the value returned by it through the * @delta_next pointer can be negative which must be taken into account by its * callers. + * + * Return: the expected length of the current sleep */ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { @@ -1357,8 +1364,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) /** * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value * for a particular CPU. + * @cpu: target CPU number * * Called from the schedutil frequency scaling governor in scheduler context. + * + * Return: the current idle calls counter value for @cpu */ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) { @@ -1371,6 +1381,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) * tick_nohz_get_idle_calls - return the current idle calls counter value * * Called from the schedutil frequency scaling governor in scheduler context. + * + * Return: the current idle calls counter value for the current CPU */ unsigned long tick_nohz_get_idle_calls(void) { @@ -1559,7 +1571,7 @@ early_param("skew_tick", skew_tick); /** * tick_setup_sched_timer - setup the tick emulation timer - * @mode: tick_nohz_mode to setup for + * @hrtimer: whether to use the hrtimer or not */ void tick_setup_sched_timer(bool hrtimer) { diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index e11c4dc65bcb..b4a7822f495d 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -46,8 +46,8 @@ struct tick_device { * @next_tick: Next tick to be fired when in dynticks mode. * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_waketime: Time when the idle was interrupted + * @idle_sleeptime_seq: sequence counter for data consistency * @idle_entrytime: Time when the idle call was entered - * @nohz_mode: Mode - one state of tick_nohz_mode * @last_jiffies: Base jiffies snapshot when next event was last computed * @timer_expires_base: Base time clock monotonic for @timer_expires * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index dee29f1f5b75..3baf2fbe6848 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64); /* * The timer wheel has LVL_DEPTH array levels. Each level provides an array of - * LVL_SIZE buckets. Each level is driven by its own clock and therefor each + * LVL_SIZE buckets. Each level is driven by its own clock and therefore each * level has a different granularity. * - * The level granularity is: LVL_CLK_DIV ^ lvl + * The level granularity is: LVL_CLK_DIV ^ level * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) * * The array level of a newly armed timer depends on the relative expiry * time. The farther the expiry time is away the higher the array level and - * therefor the granularity becomes. + * therefore the granularity becomes. * * Contrary to the original timer wheel implementation, which aims for 'exact' * expiry of the timers, this implementation removes the need for recascading @@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64); * struct timer_base - Per CPU timer base (number of base depends on config) * @lock: Lock protecting the timer_base * @running_timer: When expiring timers, the lock is dropped. To make - * sure not to race agains deleting/modifying a + * sure not to race against deleting/modifying a * currently running timer, the pointer is set to the * timer, which expires at the moment. If no timer is * running, the pointer is NULL. @@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr) } /* - * fixup_init is called when: + * timer_fixup_init is called when: * - an active object is initialized */ static bool timer_fixup_init(void *addr, enum debug_obj_state state) @@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused) } /* - * fixup_activate is called when: + * timer_fixup_activate is called when: * - an active object is activated * - an unknown non-static object is activated */ @@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state) } /* - * fixup_free is called when: + * timer_fixup_free is called when: * - an active object is freed */ static bool timer_fixup_free(void *addr, enum debug_obj_state state) @@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state) } /* - * fixup_assert_init is called when: + * timer_fixup_assert_init is called when: * - an untracked/uninit-ed object is found */ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) @@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer, * @key: lockdep class key of the fake lock used for tracking timer * sync lock dependencies * - * init_timer_key() must be done to a timer prior calling *any* of the + * init_timer_key() must be done to a timer prior to calling *any* of the * other timer functions. */ void init_timer_key(struct timer_list *timer, @@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown) * If @shutdown is set then the lock has to be taken whether the * timer is pending or not to protect against a concurrent rearm * which might hit between the lockless pending check and the lock - * aquisition. By taking the lock it is ensured that such a newly + * acquisition. By taking the lock it is ensured that such a newly * enqueued timer is dequeued and cannot end up with * timer->function == NULL in the expiry code. * @@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, /* * When timer base is not set idle, undo the effect of - * tmigr_cpu_deactivate() to prevent inconsitent states - active + * tmigr_cpu_deactivate() to prevent inconsistent states - active * timer base but inactive timer migration hierarchy. * * When timer base was already marked idle, nothing will be diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index c63a0afdcebe..ccba875d2234 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child, first_childevt = evt = data->evt; + /* + * Walking the hierarchy is required in any case when a + * remote expiry was done before. This ensures to not lose + * already queued events in non active groups (see section + * "Required event and timerqueue update after a remote + * expiry" in the documentation at the top). + * + * The two call sites which are executed without a remote expiry + * before, are not prevented from propagating changes through + * the hierarchy by the return: + * - When entering this path by tmigr_new_timer(), @evt->ignore + * is never set. + * - tmigr_inactive_up() takes care of the propagation by + * itself and ignores the return value. But an immediate + * return is possible if there is a parent, sparing group + * locking at this level, because the upper walking call to + * the parent will take care about removing this event from + * within the group and update next_expiry accordingly. + * + * However if there is no parent, ie: the hierarchy has only a + * single level so @group is the top level group, make sure the + * first event information of the group is updated properly and + * also handled properly, so skip this fast return path. + */ + if (evt->ignore && !remote && group->parent) + return true; + raw_spin_lock(&group->lock); childstate.state = 0; @@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child, * queue when the expiry time changed only or when it could be ignored. */ if (timerqueue_node_queued(&evt->nextevt)) { - if ((evt->nextevt.expires == nextexp) && !evt->ignore) + if ((evt->nextevt.expires == nextexp) && !evt->ignore) { + /* Make sure not to miss a new CPU event with the same expiry */ + evt->cpu = first_childevt->cpu; goto check_toplvl; + } if (!timerqueue_del(&group->events, &evt->nextevt)) WRITE_ONCE(group->next_expiry, KTIME_MAX); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0a5c4efc73c3..9dc605f08a23 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { .release = bpf_kprobe_multi_link_release, - .dealloc = bpf_kprobe_multi_link_dealloc, + .dealloc_deferred = bpf_kprobe_multi_link_dealloc, .fill_link_info = bpf_kprobe_multi_link_fill_link_info, }; @@ -3157,6 +3157,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link) umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); + if (umulti_link->task) + put_task_struct(umulti_link->task); + path_put(&umulti_link->path); } static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) @@ -3164,9 +3167,6 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) struct bpf_uprobe_multi_link *umulti_link; umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); - if (umulti_link->task) - put_task_struct(umulti_link->task); - path_put(&umulti_link->path); kvfree(umulti_link->uprobes); kfree(umulti_link); } @@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { .release = bpf_uprobe_multi_link_release, - .dealloc = bpf_uprobe_multi_link_dealloc, + .dealloc_deferred = bpf_uprobe_multi_link_dealloc, .fill_link_info = bpf_uprobe_multi_link_fill_link_info, }; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 217169de0920..dfe3ee6035ec 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -839,7 +839,7 @@ out: void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs) { struct probe_entry_arg *earg = tp->entry_arg; - unsigned long val; + unsigned long val = 0; int i; if (!earg) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index af6cc19a2003..68c97387aa54 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -330,7 +330,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) stack = current_pool + pool_offset; /* Pre-initialize handle once. */ - stack->handle.pool_index = pool_index + 1; + stack->handle.pool_index_plus_1 = pool_index + 1; stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; INIT_LIST_HEAD(&stack->hash_list); @@ -441,7 +441,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; - u32 pool_index = parts.pool_index - 1; + u32 pool_index = parts.pool_index_plus_1 - 1; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; diff --git a/mm/Makefile b/mm/Makefile index e4b5b75aaec9..4abb40b911ec 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -29,8 +29,7 @@ KCOV_INSTRUMENT_mmzone.o := n KCOV_INSTRUMENT_vmstat.o := n KCOV_INSTRUMENT_failslab.o := n -CFLAGS_init-mm.o += $(call cc-disable-warning, override-init) -CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides) +CFLAGS_init-mm.o += -Wno-override-init mmu-y := nommu.o mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ diff --git a/mm/filemap.c b/mm/filemap.c index 7437b2bd75c1..30de18c4fd28 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4197,7 +4197,23 @@ static void filemap_cachestat(struct address_space *mapping, /* shmem file - in swap cache */ swp_entry_t swp = radix_to_swp_entry(folio); + /* swapin error results in poisoned entry */ + if (non_swap_entry(swp)) + goto resched; + + /* + * Getting a swap entry from the shmem + * inode means we beat + * shmem_unuse(). rcu_read_lock() + * ensures swapoff waits for us before + * freeing the swapper space. However, + * we can race with swapping and + * invalidation, so there might not be + * a shadow in the swapcache (yet). + */ shadow = get_shadow_from_swap_cache(swp); + if (!shadow) + goto resched; } #endif if (workingset_test_recent(shadow, true, &workingset)) @@ -1653,20 +1653,22 @@ long populate_vma_page_range(struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKONFAULT) return nr_pages; + /* ... similarly, we've never faulted in PROT_NONE pages */ + if (!vma_is_accessible(vma)) + return -EFAULT; + gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW * and we would not want to dirty them for nothing. + * + * Otherwise, do a read fault, and use FOLL_FORCE in case it's not + * readable (ie write-only or executable). */ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) gup_flags |= FOLL_WRITE; - - /* - * We want mlock to succeed for regions that have any permissions - * other than PROT_NONE. - */ - if (vma_is_accessible(vma)) + else gup_flags |= FOLL_FORCE; if (locked) diff --git a/mm/memory.c b/mm/memory.c index f2bc6dd15eb8..d2155ced45f8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1536,7 +1536,9 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); arch_check_zapped_pte(vma, ptent); tlb_remove_tlb_entry(tlb, pte, addr); - VM_WARN_ON_ONCE(userfaultfd_wp(vma)); + if (userfaultfd_pte_wp(vma, ptent)) + zap_install_uffd_wp_if_needed(vma, addr, pte, 1, + details, ptent); ksm_might_unmap_zero_page(mm, ptent); return 1; } @@ -5971,6 +5973,10 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = ptep_get(ptep); + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_folio(vma, address, pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; diff --git a/mm/page_owner.c b/mm/page_owner.c index e7139952ffd9..d17d1351ec84 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -54,6 +54,22 @@ static depot_stack_handle_t early_handle; static void init_early_allocated_pages(void); +static inline void set_current_in_page_owner(void) +{ + /* + * Avoid recursion. + * + * We might need to allocate more memory from page_owner code, so make + * sure to signal it in order to avoid recursion. + */ + current->in_page_owner = 1; +} + +static inline void unset_current_in_page_owner(void) +{ + current->in_page_owner = 0; +} + static int __init early_page_owner_param(char *buf) { int ret = kstrtobool(buf, &page_owner_enabled); @@ -133,23 +149,16 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags) depot_stack_handle_t handle; unsigned int nr_entries; - /* - * Avoid recursion. - * - * Sometimes page metadata allocation tracking requires more - * memory to be allocated: - * - when new stack trace is saved to stack depot - */ if (current->in_page_owner) return dummy_handle; - current->in_page_owner = 1; + set_current_in_page_owner(); nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2); handle = stack_depot_save(entries, nr_entries, flags); if (!handle) handle = failure_handle; + unset_current_in_page_owner(); - current->in_page_owner = 0; return handle; } @@ -164,9 +173,13 @@ static void add_stack_record_to_list(struct stack_record *stack_record, gfp_mask &= (GFP_ATOMIC | GFP_KERNEL); gfp_mask |= __GFP_NOWARN; + set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_mask); - if (!stack) + if (!stack) { + unset_current_in_page_owner(); return; + } + unset_current_in_page_owner(); stack->stack_record = stack_record; stack->next = NULL; diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c index 062d1c1097ae..ce514e700d2f 100644 --- a/mm/shmem_quota.c +++ b/mm/shmem_quota.c @@ -116,7 +116,7 @@ static int shmem_free_file_info(struct super_block *sb, int type) static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) { struct mem_dqinfo *info = sb_dqinfo(sb, qid->type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, *qid); struct quota_info *dqopt = sb_dqopt(sb); struct quota_id *entry = NULL; @@ -126,6 +126,7 @@ static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) return -ESRCH; down_read(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); @@ -165,7 +166,7 @@ out_unlock: static int shmem_acquire_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node **n; struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info; struct rb_node *parent = NULL, *new_node = NULL; struct quota_id *new_entry, *entry; @@ -176,6 +177,8 @@ static int shmem_acquire_dquot(struct dquot *dquot) mutex_lock(&dquot->dq_lock); down_write(&dqopt->dqio_sem); + n = &((struct rb_root *)info->dqi_priv)->rb_node; + while (*n) { parent = *n; entry = rb_entry(parent, struct quota_id, node); @@ -264,7 +267,7 @@ static bool shmem_is_empty_dquot(struct dquot *dquot) static int shmem_release_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, dquot->dq_id); struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); struct quota_id *entry = NULL; @@ -275,6 +278,7 @@ static int shmem_release_dquot(struct dquot *dquot) goto out_dqlock; down_write(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 712160cd41ec..3c3539c573e7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1444,7 +1444,8 @@ static int uffd_move_lock(struct mm_struct *mm, */ down_read(&(*dst_vmap)->vm_lock->lock); if (*dst_vmap != *src_vmap) - down_read(&(*src_vmap)->vm_lock->lock); + down_read_nested(&(*src_vmap)->vm_lock->lock, + SINGLE_DEPTH_NESTING); } mmap_read_unlock(mm); return err; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 22aa63f4ef63..68fa001648cc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -989,6 +989,27 @@ unsigned long vmalloc_nr_pages(void) return atomic_long_read(&nr_vmalloc_pages); } +static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) +{ + struct rb_node *n = root->rb_node; + + addr = (unsigned long)kasan_reset_tag((void *)addr); + + while (n) { + struct vmap_area *va; + + va = rb_entry(n, struct vmap_area, rb_node); + if (addr < va->va_start) + n = n->rb_left; + else if (addr >= va->va_end) + n = n->rb_right; + else + return va; + } + + return NULL; +} + /* Look up the first VA which satisfies addr < va_end, NULL if none. */ static struct vmap_area * __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) @@ -1025,47 +1046,39 @@ __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) static struct vmap_node * find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va) { - struct vmap_node *vn, *va_node = NULL; - struct vmap_area *va_lowest; + unsigned long va_start_lowest; + struct vmap_node *vn; int i; - for (i = 0; i < nr_vmap_nodes; i++) { +repeat: + for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) { vn = &vmap_nodes[i]; spin_lock(&vn->busy.lock); - va_lowest = __find_vmap_area_exceed_addr(addr, &vn->busy.root); - if (va_lowest) { - if (!va_node || va_lowest->va_start < (*va)->va_start) { - if (va_node) - spin_unlock(&va_node->busy.lock); - - *va = va_lowest; - va_node = vn; - continue; - } - } + *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root); + + if (*va) + if (!va_start_lowest || (*va)->va_start < va_start_lowest) + va_start_lowest = (*va)->va_start; spin_unlock(&vn->busy.lock); } - return va_node; -} - -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) -{ - struct rb_node *n = root->rb_node; + /* + * Check if found VA exists, it might have gone away. In this case we + * repeat the search because a VA has been removed concurrently and we + * need to proceed to the next one, which is a rare case. + */ + if (va_start_lowest) { + vn = addr_to_node(va_start_lowest); - addr = (unsigned long)kasan_reset_tag((void *)addr); + spin_lock(&vn->busy.lock); + *va = __find_vmap_area(va_start_lowest, &vn->busy.root); - while (n) { - struct vmap_area *va; + if (*va) + return vn; - va = rb_entry(n, struct vmap_area, rb_node); - if (addr < va->va_start) - n = n->rb_left; - else if (addr >= va->va_end) - n = n->rb_right; - else - return va; + spin_unlock(&vn->busy.lock); + goto repeat; } return NULL; @@ -2343,6 +2356,9 @@ struct vmap_area *find_vmap_area(unsigned long addr) struct vmap_area *va; int i, j; + if (unlikely(!vmap_initialized)) + return NULL; + /* * An addr_to_node_id(addr) converts an address to a node index * where a VA is located. If VA spans several zones and passed diff --git a/mm/zswap.c b/mm/zswap.c index 9dec853647c8..caed028945b0 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1080,7 +1080,17 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page) mutex_lock(&acomp_ctx->mutex); src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); - if (acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) { + /* + * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer + * to do crypto_acomp_decompress() which might sleep. In such cases, we must + * resort to copying the buffer to a temporary one. + * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer, + * such as a kmap address of high memory or even ever a vmap address. + * However, sg_init_one is only equipped to handle linearly mapped low memory. + * In such cases, we also must copy the buffer to a temporary and lowmem one. + */ + if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) || + !virt_addr_valid(src)) { memcpy(acomp_ctx->buffer, src, entry->length); src = acomp_ctx->buffer; zpool_unmap_handle(zpool, entry->handle); @@ -1094,7 +1104,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page) BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); mutex_unlock(&acomp_ctx->mutex); - if (!acomp_ctx->is_sleepable || zpool_can_sleep_mapped(zpool)) + if (src != acomp_ctx->buffer) zpool_unmap_handle(zpool, entry->handle); } @@ -1313,6 +1323,14 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker, if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) return 0; + /* + * The shrinker resumes swap writeback, which will enter block + * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS + * rules (may_enter_fs()), which apply on a per-folio basis. + */ + if (!gfp_has_io_fs(sc->gfp_mask)) + return 0; + #ifdef CONFIG_MEMCG_KMEM mem_cgroup_flush_stats(memcg); nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; @@ -1618,6 +1636,7 @@ bool zswap_load(struct folio *folio) swp_entry_t swp = folio->swap; pgoff_t offset = swp_offset(swp); struct page *page = &folio->page; + bool swapcache = folio_test_swapcache(folio); struct zswap_tree *tree = swap_zswap_tree(swp); struct zswap_entry *entry; u8 *dst; @@ -1630,7 +1649,20 @@ bool zswap_load(struct folio *folio) spin_unlock(&tree->lock); return false; } - zswap_rb_erase(&tree->rbroot, entry); + /* + * When reading into the swapcache, invalidate our entry. The + * swapcache can be the authoritative owner of the page and + * its mappings, and the pressure that results from having two + * in-memory copies outweighs any benefits of caching the + * compression work. + * + * (Most swapins go through the swapcache. The notable + * exception is the singleton fault on SWP_SYNCHRONOUS_IO + * files, which reads into a private page and may free it if + * the fault fails. We remain the primary owner of the entry.) + */ + if (swapcache) + zswap_rb_erase(&tree->rbroot, entry); spin_unlock(&tree->lock); if (entry->length) @@ -1645,9 +1677,10 @@ bool zswap_load(struct folio *folio) if (entry->objcg) count_objcg_event(entry->objcg, ZSWPIN); - zswap_entry_free(entry); - - folio_mark_dirty(folio); + if (swapcache) { + zswap_entry_free(entry); + folio_mark_dirty(folio); + } return true; } diff --git a/net/9p/client.c b/net/9p/client.c index e265a0ca6bdd..f7e90b4769bb 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, received = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); @@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) int total = 0; *err = 0; - p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", - fid->fid, offset, iov_iter_count(from)); - while (iov_iter_count(from)) { int count = iov_iter_count(from); int rsize = fid->iounit; @@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (count < rsize) rsize = count; + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", + fid->fid, offset, rsize, count); + /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0, @@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) written = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 1a3948b8c493..196060dc6138 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -95,7 +95,6 @@ struct p9_poll_wait { * @unsent_req_list: accounting for requests that haven't been sent * @rreq: read request * @wreq: write request - * @req: current request being processed (if any) * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame * @wpos: write position for current frame diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index c5462486dbca..282ec581c072 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev) spin_lock_bh(&ax25_dev_lock); #ifdef CONFIG_AX25_DAMA_SLAVE - ax25_ds_del_timer(ax25_dev); + timer_shutdown_sync(&ax25_dev->dama.slave_timer); #endif /* diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 1690ae57a09d..a7028d38c1f5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2874,7 +2874,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) cancel_delayed_work_sync(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); - hci_cmd_sync_cancel_sync(hdev, -err); + hci_cmd_sync_cancel_sync(hdev, err); } /* Suspend HCI device */ @@ -2894,7 +2894,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - hci_cancel_cmd_sync(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4210,7 +4210,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) err = hci_send_frame(hdev, skb); if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, err); + hci_cmd_sync_cancel_sync(hdev, -err); return; } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 233453807b50..ce3ff2fa72e5 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val > hdev->conn_info_max_age) + hci_dev_lock(hdev); + if (val == 0 || val > hdev->conn_info_max_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_min_age = val; hci_dev_unlock(hdev); @@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val < hdev->conn_info_min_age) + hci_dev_lock(hdev); + if (val == 0 || val < hdev->conn_info_min_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_max_age = val; hci_dev_unlock(hdev); @@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_min_interval = val; hci_dev_unlock(hdev); @@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_max_interval = val; hci_dev_unlock(hdev); @@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_min_interval = val; hci_dev_unlock(hdev); @@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_max_interval = val; hci_dev_unlock(hdev); @@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); @@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_max_interval = val; hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4ae224824012..a8b8cfebe018 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3208,6 +3208,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (test_bit(HCI_ENCRYPT, &hdev->flags)) set_bit(HCI_CONN_ENCRYPT, &conn->flags); + /* "Link key request" completed ahead of "connect request" completes */ + if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && + ev->link_type == ACL_LINK) { + struct link_key *key; + struct hci_cp_read_enc_key_size cp; + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (key) { + set_bit(HCI_CONN_ENCRYPT, &conn->flags); + + if (!(hdev->commands[20] & 0x10)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } else { + cp.handle = cpu_to_le16(conn->handle); + if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, + sizeof(cp), &cp)) { + bt_dev_err(hdev, "sending read key size failed"); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } + } + + hci_encrypt_cfm(conn, ev->status); + } + } + /* Get remote features */ if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index f6b662369322..8fe02921adf1 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -617,7 +617,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; + /* req_result is __u32 so error must be positive to be properly + * propagated. + */ + hdev->req_result = err < 0 ? -err : err; hdev->req_status = HCI_REQ_CANCELED; wake_up_interruptible(&hdev->req_wait_q); @@ -3416,7 +3419,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - bacpy(&hdev->public_addr, &ba); + if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + baswap(&hdev->public_addr, &ba); + else + bacpy(&hdev->public_addr, &ba); } struct hci_init_stage { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 99d82676f780..cbd0e3586c3f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) struct ebt_table_info *newinfo; struct ebt_replace tmp; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; @@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg, { struct compat_ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; diff --git a/net/core/dev.c b/net/core/dev.c index 9a67003e49db..984ff8b9d0e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -429,7 +429,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * PP consumers must pay attention to run APIs in the appropriate context * (e.g. NAPI context). */ -static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool); +static DEFINE_PER_CPU(struct page_pool *, system_page_pool); #ifdef CONFIG_LOCKDEP /* diff --git a/net/core/gro.c b/net/core/gro.c index ee30d4f0c038..83f35d99a682 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -192,8 +192,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) } merge: - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; diff --git a/net/core/sock.c b/net/core/sock.c index 43bf3818c19e..0963689a5950 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -482,7 +482,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { + if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; @@ -552,7 +552,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, skb->dev = NULL; - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { + if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 27d733c0f65e..8598466a3805 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock *sk; int err = 0; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) @@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) struct bpf_shtab_elem *elem; int ret = -ENOENT; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c98b5b71ad7c..e9d45133d641 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -132,30 +132,29 @@ static int hsr_dev_open(struct net_device *dev) { struct hsr_priv *hsr; struct hsr_port *port; - char designation; + const char *designation = NULL; hsr = netdev_priv(dev); - designation = '\0'; hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: - designation = 'A'; + designation = "Slave A"; break; case HSR_PT_SLAVE_B: - designation = 'B'; + designation = "Slave B"; break; default: - designation = '?'; + designation = "Unknown"; } if (!is_slave_up(port->dev)) - netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n", + netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n", designation, port->dev->name); } - if (designation == '\0') + if (!designation) netdev_warn(dev, "No slave devices configured\n"); return 0; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index e5742f2a2d52..1b6457f357bd 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port) netdev_update_features(master->dev); dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); netdev_rx_handler_unregister(port->dev); - dev_set_promiscuity(port->dev, -1); + if (!port->hsr->fwd_offloaded) + dev_set_promiscuity(port->dev, -1); netdev_upper_dev_unlink(port->dev, master->dev); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7d8090f109ef..3b38610958ee 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -203,8 +203,15 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, kuid_t sk_uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) - return false; + if (ipv6_only_sock(sk2)) { + if (sk->sk_family == AF_INET) + return false; + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return false; +#endif + } return inet_bind_conflict(sk, sk2, sk_uid, relax, reuseport_cb_ok, reuseport_ok); @@ -287,6 +294,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l struct sock_reuseport *reuseport_cb; struct inet_bind_hashbucket *head2; struct inet_bind2_bucket *tb2; + bool conflict = false; bool reuseport_cb_ok; rcu_read_lock(); @@ -299,18 +307,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l spin_lock(&head2->lock); - inet_bind_bucket_for_each(tb2, &head2->chain) - if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) - break; + inet_bind_bucket_for_each(tb2, &head2->chain) { + if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) + continue; - if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, - reuseport_ok)) { - spin_unlock(&head2->lock); - return true; + if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok)) + continue; + + conflict = true; + break; } spin_unlock(&head2->lock); - return false; + + return conflict; } /* @@ -771,6 +781,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) } EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +void inet_csk_clear_xmit_timers_sync(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* ongoing timer handlers need to acquire socket lock. */ + sock_not_owned_by_me(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; + + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); + sk_stop_timer_sync(sk, &sk->sk_timer); +} + void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7072fc0783ef..c88c9034d630 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include <net/ip.h> #include <net/ipv6.h> +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a4941f53b523..fb947d1613fe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7b16c211b904..57ddcd8c62f6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, tpi->flags | TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); } else { + if (unlikely(!pskb_may_pull(skb, + gre_hdr_len + sizeof(*ershdr)))) + return PACKET_REJECT; + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 8f6e950163a7..1b991b889506 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -329,6 +329,7 @@ config NFT_COMPAT_ARP config IP_NF_ARPFILTER tristate "arptables-legacy packet filtering support" select IP_NF_ARPTABLES + select NETFILTER_FAMILY_ARP depends on NETFILTER_XTABLES help ARP packet filtering defines a table `filter', which has a series of diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2407066b0fec..b150c9929b12 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7da1df4997d0..487670759578 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1492,6 +1494,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 74928a9d1aa4..535856b0f0ed 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -768,8 +768,10 @@ static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used) struct net *net = nh->net; int err; - if (nexthop_notifiers_is_empty(net)) + if (nexthop_notifiers_is_empty(net)) { + *hw_stats_used = false; return 0; + } err = nh_notifier_grp_hw_stats_init(&info, nh); if (err) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d20b62d52171..e767721b3a58 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 661d0e0d273f..c02bf011d4a6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +EXPORT_SYMBOL(udp_encap_needed_key); + +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +EXPORT_SYMBOL(udpv6_encap_needed_key); +#endif + void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b9880743765c..3498dd1d0694 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->count++; p->data_len += skb->len; - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; @@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - /* we can do L4 aggregation only if the packet can't land in a tunnel - * otherwise we could corrupt the inner stream + /* We can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream. Detecting such packets + * cannot be foolproof and the aggregation might still happen in some + * cases. Such packets should be caught in udp_unexpected_gso later. */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { + /* If the packet was locally encapsulated in a UDP tunnel that + * wasn't detected above, do not GRO. + */ + if (skb->encapsulation) + goto out; + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; @@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 247bd4d8ee45..92db9b474f2b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5416,10 +5416,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, err = 0; if (fillargs.ifindex) { - err = -ENODEV; dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); - if (!dev) + if (!dev) { + err = -ENODEV; goto done; + } idev = __in6_dev_get(dev); if (idev) err = in6_dump_addrs(idev, skb, cb, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5c558dc1c683..7209419cfb0e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -651,19 +651,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (!w) { /* New dump: * - * 1. hook callback destructor. - */ - cb->args[3] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. + * 1. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); if (!w) return -ENOMEM; w->func = fib6_dump_node; cb->args[2] = (long)w; + + /* 2. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + } arg.skb = skb; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ca7e77e84283..c89aef524df9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, struct ip6_tnl *tunnel; u8 ver; + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index fd9f049d6d41..636b360311c5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1501,6 +1503,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a51a44571c3..d0dcbaca1994 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c1e6469d091..8b1dd7f51249 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -447,7 +447,7 @@ csum_copy_err: goto try_again; } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 312bcaeea96f..bbd347de00b4 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f03452dc716d..f67c1d021812 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2199,15 +2199,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { - ieee80211_clear_fast_rx(sta); + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; + ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 49da401c5340..35a8ba25fa57 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -158,7 +158,7 @@ do { \ _sdata_dbg(print, sdata, "[link %d] " fmt, \ link_id, ##__VA_ARGS__); \ else \ - _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__); \ + _sdata_dbg(print, sdata, fmt, ##__VA_ARGS__); \ } while (0) #define link_dbg(link, fmt, ...) \ _link_id_dbg(1, (link)->sdata, (link)->link_id, \ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b6fead612b66..bd507d6b65e3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -131,7 +131,7 @@ struct ieee80211_bss { }; /** - * enum ieee80211_corrupt_data_flags - BSS data corruption flags + * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags * @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted * @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted * @@ -144,7 +144,7 @@ enum ieee80211_bss_corrupt_data_flags { }; /** - * enum ieee80211_valid_data_flags - BSS valid data flags + * enum ieee80211_bss_valid_data_flags - BSS valid data flags * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 47a2cba8313f..96b70006b7fc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5874,6 +5874,15 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.active_links != active_links) { + /* usable links are affected when active_links are changed, + * so notify the driver about the status change + */ + changed |= BSS_CHANGED_MLD_VALID_LINKS; + active_links &= sdata->vif.active_links; + if (!active_links) + active_links = + BIT(__ffs(sdata->vif.valid_links & + ~dormant_links)); ret = ieee80211_set_active_links(&sdata->vif, active_links); if (ret) { sdata_info(sdata, "Failed to set TTLM active links\n"); @@ -5888,7 +5897,6 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, goto out; } - changed |= BSS_CHANGED_MLD_VALID_LINKS; sdata->vif.suspended_links = suspended_links; if (sdata->vif.suspended_links) changed |= BSS_CHANGED_MLD_TTLM; @@ -7652,7 +7660,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "failed to insert STA entry for the AP (error %d)\n", err); - goto out_err; + goto out_release_chan; } } else WARN_ON_ONCE(!ether_addr_equal(link->u.mgd.bssid, cbss->bssid)); @@ -7663,8 +7671,9 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return 0; +out_release_chan: + ieee80211_link_release_channel(link); out_err: - ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0, 0); return err; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3a1967bc7bad..7e74b812e366 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_set_state(newsk, TCP_CLOSE); } } else { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); tcpfallback: newsk->sk_kern_sock = kern; lock_sock(newsk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index dcd1c76d2a3b..73fdf423de44 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1493,6 +1493,10 @@ int mptcp_set_rcvlowat(struct sock *sk, int val) struct mptcp_subflow_context *subflow; int space, cap; + /* bpf can land here with a wrong sk type */ + if (sk->sk_protocol == IPPROTO_TCP) + return -EINVAL; + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1626dd20c68f..6042a47da61b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -905,6 +905,8 @@ dispose_child: return child; fallback: + if (fallback) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); mptcp_subflow_drop_ctx(child); return child; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5fa3d3540c93..d89d77946719 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1200,6 +1200,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) __NFT_TABLE_F_WAS_AWAKEN | \ __NFT_TABLE_F_WAS_ORPHAN) +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->ctx.table == ctx->table && + ((trans->msg_type == NFT_MSG_NEWCHAIN && + nft_trans_chain_update(trans)) || + (trans->msg_type == NFT_MSG_DELCHAIN && + nft_is_base_chain(trans->ctx.chain)))) + return true; + } + + return false; +} + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -1226,7 +1246,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return -EOPNOTSUPP; /* No dormant off/on/off/on games in single transaction */ - if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, @@ -2430,6 +2450,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook = {}; + if (table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; @@ -2631,6 +2654,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } } + if (table->flags & __NFT_TABLE_F_UPDATE && + !list_empty(&hook.list)) { + NL_SET_BAD_ATTR(extack, attr); + err = -EOPNOTSUPP; + goto err_hooks; + } + if (!(table->flags & NFT_TABLE_F_DORMANT) && nft_is_base_chain(chain) && !list_empty(&hook.list)) { @@ -2860,6 +2890,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_trans *trans; int err; + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EOPNOTSUPP; + err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, ctx->family, chain->flags, extack); if (err < 0) @@ -2944,7 +2977,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (nla[NFTA_CHAIN_HOOK]) { - if (chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN || + chain->flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; if (nft_is_base_chain(chain)) { @@ -8263,11 +8297,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, return err; } +/* call under rcu_read_lock */ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; - list_for_each_entry(type, &nf_tables_flowtables, list) { + list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { if (family == type->family) return type; } @@ -8279,9 +8314,13 @@ nft_flowtable_type_get(struct net *net, u8 family) { const struct nf_flowtable_type *type; + rcu_read_lock(); type = __nft_flowtable_type_get(family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -10182,9 +10221,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) { nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); + } } else { nft_chain_del(trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, @@ -10423,10 +10464,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { @@ -10460,9 +10502,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); + } free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); @@ -10616,12 +10660,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - - return 0; + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, @@ -10634,6 +10673,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, gc_seq = nft_gc_seq_begin(nft_net); ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); + + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + mutex_unlock(&nft_net->commit_mutex); return ret; @@ -11439,9 +11489,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nft_net->commit_list) || - !list_empty(&nft_net->module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); __nft_release_tables(net); @@ -11533,6 +11584,7 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); + nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index cdad47b140fa..0d26c8ec9993 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + if (!nci_plen(skb->data)) { + kfree_skb(skb); + break; + } + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a4e3c5de998b..00dbcd4d28e6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, } ret = PTR_ERR(trans_private); /* Trigger connection so that its ready for the next retry */ - if (ret == -ENODEV) + if (ret == -ENODEV && cp) rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 39945b139c48..cd0accaf844a 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); struct tcf_skbmod_params *p; - struct tc_skbmod opt = { - .index = d->tcf_index, - .refcnt = refcount_read(&d->tcf_refcnt) - ref, - .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, - }; + struct tc_skbmod opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + opt.index = d->tcf_index; + opt.refcnt = refcount_read(&d->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; p = rcu_dereference_protected(d->skbmod_p, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 65e05b0c98e4..60239378d43f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) notify = !sch->q.qlen && !WARN_ON_ONCE(!n && !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ - sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); + sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON_ONCE(parentid != TC_H_ROOT); break; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index b2c1b683a88e..d2b02710ab07 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -921,8 +921,6 @@ out_err: * Caller provides the truncation length of the output token (h) in * cksumout.len. * - * Note that for RPCSEC, the "initial cipher state" is always all zeroes. - * * Return values: * %GSS_S_COMPLETE: Digest computed, @cksumout filled in * %GSS_S_FAILURE: Call failed @@ -933,19 +931,22 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher, int body_offset, struct xdr_netobj *cksumout) { unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher); - static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE]; struct ahash_request *req; struct scatterlist sg[1]; + u8 *iv, *checksumdata; int err = -ENOMEM; - u8 *checksumdata; checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL); if (!checksumdata) return GSS_S_FAILURE; + /* For RPCSEC, the "initial cipher state" is always all zeroes. */ + iv = kzalloc(ivsize, GFP_KERNEL); + if (!iv) + goto out_free_mem; req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) - goto out_free_cksumdata; + goto out_free_mem; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); err = crypto_ahash_init(req); if (err) @@ -969,7 +970,8 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher, out_free_ahash: ahash_request_free(req); -out_free_cksumdata: +out_free_mem: + kfree(iv); kfree_sensitive(checksumdata); return err ? GSS_S_FAILURE : GSS_S_COMPLETE; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 545017a3daa4..6b3f01beb294 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1206,15 +1206,6 @@ err_noclose: * MSG_SPLICE_PAGES is used exclusively to reduce the number of * copy operations in this path. Therefore the caller must ensure * that the pages backing @xdr are unchanging. - * - * Note that the send is non-blocking. The caller has incremented - * the reference count on each page backing the RPC message, and - * the network layer will "put" these pages when transmission is - * complete. - * - * This is safe for our RPC services because the memory backing - * the head and tail components is never kmalloc'd. These always - * come from pages in the svc_rqst::rq_pages array. */ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, rpc_fraghdr marker, unsigned int *sentp) @@ -1244,6 +1235,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, 1 + count, sizeof(marker) + rqstp->rq_res.len); ret = sock_sendmsg(svsk->sk_sock, &msg); + page_frag_free(buf); if (ret < 0) return ret; *sentp += ret; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 211f57164cb6..b783231668c6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ @@ -2152,12 +2152,15 @@ recv_end: } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) + if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 1748268e0694..ee5d306a96d0 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -120,7 +120,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) if (!skb) break; - virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); sgs = vsock->out_sgs; sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), @@ -170,6 +169,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) break; } + virtio_transport_deliver_tap_pkt(skb); + if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index e039e66ab377..cbbf347c6b2e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1024,7 +1024,7 @@ TRACE_EVENT(rdev_get_mpp, TRACE_EVENT(rdev_dump_mpp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *dst, u8 *mpp), - TP_ARGS(wiphy, netdev, _idx, mpp, dst), + TP_ARGS(wiphy, netdev, _idx, dst, mpp), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index a161c64d1765..838ad6541a17 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -4,6 +4,7 @@ * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> + * Copyright (C) 2024 Intel Corporation * * (As all part of the Linux kernel, this file is GPL) */ @@ -662,7 +663,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev) dev->ieee80211_ptr->wiphy->wext && dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) { wireless_warn_cfg80211_wext(); - if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) + if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO | + WIPHY_FLAG_DISABLE_WEXT)) return NULL; return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev); } @@ -704,7 +706,8 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd) #ifdef CONFIG_CFG80211_WEXT if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) { wireless_warn_cfg80211_wext(); - if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) + if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO | + WIPHY_FLAG_DISABLE_WEXT)) return NULL; handlers = dev->ieee80211_ptr->wiphy->wext; } diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 3ce5d503a6da..c5af566e911a 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -114,6 +114,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) +KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang + ifdef CONFIG_CC_IS_CLANG # Clang before clang-16 would warn on default argument promotions. ifneq ($(call clang-min-version, 160000),y) @@ -151,10 +153,6 @@ KBUILD_CFLAGS += -Wtype-limits KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) -ifdef CONFIG_CC_IS_CLANG -KBUILD_CFLAGS += -Winitializer-overrides -endif - KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 else @@ -164,9 +162,7 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value -ifdef CONFIG_CC_IS_CLANG -KBUILD_CFLAGS += -Wno-initializer-overrides -else +ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 8568d256d6fb..79fcf2731686 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -23,7 +23,7 @@ modname = $(notdir $(@:.mod.o=)) part-of-module = y quiet_cmd_cc_o_c = CC [M] $@ - cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $< + cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV) $(CFLAGS_KCSAN), $(c_flags)) -c -o $@ $< %.mod.o: %.mod.c FORCE $(call if_changed_dep,cc_o_c) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 4606944984ee..c55878bddfdd 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -414,8 +414,8 @@ class PrinterRST(Printer): version = version.stdout.decode().rstrip() except: try: - version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot, - capture_output=True, check=True) + version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'], + cwd=linuxRoot, capture_output=True, check=True) version = version.stdout.decode().rstrip() except: return 'Linux' diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index b5730061872b..965bb40c50e5 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -552,11 +552,6 @@ static int conf_choice(struct menu *menu) continue; } sym_set_tristate_value(child->sym, yes); - for (child = child->list; child; child = child->next) { - indent += 2; - conf(child); - indent -= 2; - } return 1; } } diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index e69d7c59d930..e7cc9e985c4f 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -89,7 +89,7 @@ void menu_add_visibility(struct expr *dep); struct property *menu_add_prompt(enum prop_type type, char *prompt, struct expr *dep); void menu_add_expr(enum prop_type type, struct expr *expr, struct expr *dep); void menu_add_symbol(enum prop_type type, struct symbol *sym, struct expr *dep); -void menu_finalize(struct menu *parent); +void menu_finalize(void); void menu_set_type(int type); extern struct menu rootmenu; diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c index 31d0a89fbeb7..75493302fb85 100644 --- a/scripts/kconfig/lxdialog/checklist.c +++ b/scripts/kconfig/lxdialog/checklist.c @@ -119,7 +119,7 @@ int dialog_checklist(const char *title, const char *prompt, int height, } do_resize: - if (getmaxy(stdscr) < (height + CHECKLIST_HEIGTH_MIN)) + if (getmaxy(stdscr) < (height + CHECKLIST_HEIGHT_MIN)) return -ERRDISPLAYTOOSMALL; if (getmaxx(stdscr) < (width + CHECKLIST_WIDTH_MIN)) return -ERRDISPLAYTOOSMALL; diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h index 2d15ba893fbf..f6c2ebe6d1f9 100644 --- a/scripts/kconfig/lxdialog/dialog.h +++ b/scripts/kconfig/lxdialog/dialog.h @@ -162,17 +162,17 @@ int on_key_esc(WINDOW *win); int on_key_resize(void); /* minimum (re)size values */ -#define CHECKLIST_HEIGTH_MIN 6 /* For dialog_checklist() */ +#define CHECKLIST_HEIGHT_MIN 6 /* For dialog_checklist() */ #define CHECKLIST_WIDTH_MIN 6 -#define INPUTBOX_HEIGTH_MIN 2 /* For dialog_inputbox() */ +#define INPUTBOX_HEIGHT_MIN 2 /* For dialog_inputbox() */ #define INPUTBOX_WIDTH_MIN 2 -#define MENUBOX_HEIGTH_MIN 15 /* For dialog_menu() */ +#define MENUBOX_HEIGHT_MIN 15 /* For dialog_menu() */ #define MENUBOX_WIDTH_MIN 65 -#define TEXTBOX_HEIGTH_MIN 8 /* For dialog_textbox() */ +#define TEXTBOX_HEIGHT_MIN 8 /* For dialog_textbox() */ #define TEXTBOX_WIDTH_MIN 8 -#define YESNO_HEIGTH_MIN 4 /* For dialog_yesno() */ +#define YESNO_HEIGHT_MIN 4 /* For dialog_yesno() */ #define YESNO_WIDTH_MIN 4 -#define WINDOW_HEIGTH_MIN 19 /* For init_dialog() */ +#define WINDOW_HEIGHT_MIN 19 /* For init_dialog() */ #define WINDOW_WIDTH_MIN 80 int init_dialog(const char *backtitle); diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index 1dcfb288ee63..3c6e24b20f5b 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -43,7 +43,7 @@ int dialog_inputbox(const char *title, const char *prompt, int height, int width strcpy(instr, init); do_resize: - if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGTH_MIN)) + if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGHT_MIN)) return -ERRDISPLAYTOOSMALL; if (getmaxx(stdscr) <= (width - INPUTBOX_WIDTH_MIN)) return -ERRDISPLAYTOOSMALL; diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c index 0e333284e947..6e6244df0c56 100644 --- a/scripts/kconfig/lxdialog/menubox.c +++ b/scripts/kconfig/lxdialog/menubox.c @@ -172,7 +172,7 @@ int dialog_menu(const char *title, const char *prompt, do_resize: height = getmaxy(stdscr); width = getmaxx(stdscr); - if (height < MENUBOX_HEIGTH_MIN || width < MENUBOX_WIDTH_MIN) + if (height < MENUBOX_HEIGHT_MIN || width < MENUBOX_WIDTH_MIN) return -ERRDISPLAYTOOSMALL; height -= 4; diff --git a/scripts/kconfig/lxdialog/textbox.c b/scripts/kconfig/lxdialog/textbox.c index 058ed0e5bbd5..0abaf635978f 100644 --- a/scripts/kconfig/lxdialog/textbox.c +++ b/scripts/kconfig/lxdialog/textbox.c @@ -175,7 +175,7 @@ int dialog_textbox(const char *title, const char *tbuf, int initial_height, do_resize: getmaxyx(stdscr, height, width); - if (height < TEXTBOX_HEIGTH_MIN || width < TEXTBOX_WIDTH_MIN) + if (height < TEXTBOX_HEIGHT_MIN || width < TEXTBOX_WIDTH_MIN) return -ERRDISPLAYTOOSMALL; if (initial_height != 0) height = initial_height; diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c index 3fb7508b68a2..f18e2a89f613 100644 --- a/scripts/kconfig/lxdialog/util.c +++ b/scripts/kconfig/lxdialog/util.c @@ -291,7 +291,7 @@ int init_dialog(const char *backtitle) getyx(stdscr, saved_y, saved_x); getmaxyx(stdscr, height, width); - if (height < WINDOW_HEIGTH_MIN || width < WINDOW_WIDTH_MIN) { + if (height < WINDOW_HEIGHT_MIN || width < WINDOW_WIDTH_MIN) { endwin(); return -ERRDISPLAYTOOSMALL; } diff --git a/scripts/kconfig/lxdialog/yesno.c b/scripts/kconfig/lxdialog/yesno.c index bcaac9b7bab2..b57d25e1549f 100644 --- a/scripts/kconfig/lxdialog/yesno.c +++ b/scripts/kconfig/lxdialog/yesno.c @@ -32,7 +32,7 @@ int dialog_yesno(const char *title, const char *prompt, int height, int width) WINDOW *dialog; do_resize: - if (getmaxy(stdscr) < (height + YESNO_HEIGTH_MIN)) + if (getmaxy(stdscr) < (height + YESNO_HEIGHT_MIN)) return -ERRDISPLAYTOOSMALL; if (getmaxx(stdscr) < (width + YESNO_WIDTH_MIN)) return -ERRDISPLAYTOOSMALL; diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index f4bb391d50cf..c0969097447d 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -659,9 +659,9 @@ static void conf_choice(struct menu *menu) dialog_clear(); res = dialog_checklist(prompt ? prompt : "Main Menu", radiolist_instructions, - MENUBOX_HEIGTH_MIN, + MENUBOX_HEIGHT_MIN, MENUBOX_WIDTH_MIN, - CHECKLIST_HEIGTH_MIN); + CHECKLIST_HEIGHT_MIN); selected = item_activate_selected(); switch (res) { case 0: diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 8498481e6afe..3b822cd110f4 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -282,7 +282,7 @@ static void sym_check_prop(struct symbol *sym) } } -void menu_finalize(struct menu *parent) +static void _menu_finalize(struct menu *parent, bool inside_choice) { struct menu *menu, *last_menu; struct symbol *sym; @@ -296,7 +296,12 @@ void menu_finalize(struct menu *parent) * and propagate parent dependencies before moving on. */ - if (sym && sym_is_choice(sym)) { + bool is_choice = false; + + if (sym && sym_is_choice(sym)) + is_choice = true; + + if (is_choice) { if (sym->type == S_UNKNOWN) { /* find the first choice value to find out choice type */ current_entry = parent; @@ -394,7 +399,7 @@ void menu_finalize(struct menu *parent) } } - if (sym && sym_is_choice(sym)) + if (is_choice) expr_free(parentdep); /* @@ -402,8 +407,8 @@ void menu_finalize(struct menu *parent) * moving on */ for (menu = parent->list; menu; menu = menu->next) - menu_finalize(menu); - } else if (sym) { + _menu_finalize(menu, is_choice); + } else if (!inside_choice && sym) { /* * Automatic submenu creation. If sym is a symbol and A, B, C, * ... are consecutive items (symbols, menus, ifs, etc.) that @@ -463,7 +468,7 @@ void menu_finalize(struct menu *parent) /* Superset, put in submenu */ expr_free(dep2); next: - menu_finalize(menu); + _menu_finalize(menu, false); menu->parent = parent; last_menu = menu; } @@ -582,6 +587,11 @@ void menu_finalize(struct menu *parent) } } +void menu_finalize(void) +{ + _menu_finalize(&rootmenu, false); +} + bool menu_has_prompt(struct menu *menu) { if (!menu->prompt) diff --git a/scripts/kconfig/parser.y b/scripts/kconfig/parser.y index b45bfaf0a02b..7fb996612c96 100644 --- a/scripts/kconfig/parser.y +++ b/scripts/kconfig/parser.y @@ -515,7 +515,7 @@ void conf_parse(const char *name) menu_add_prompt(P_MENU, "Main menu", NULL); } - menu_finalize(&rootmenu); + menu_finalize(); menu = &rootmenu; while (menu) { diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 967f1abb0edb..cb1be22afc65 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1541,7 +1541,7 @@ sub create_parameterlist($$$$) { save_struct_actual($2); push_parameter($2, "$type $1", $arg, $file, $declaration_name); - } elsif ($param =~ m/(.*?):(\d+)/) { + } elsif ($param =~ m/(.*?):(\w+)/) { if ($type ne "") { # skip unnamed bit-fields save_struct_actual($1); push_parameter($1, "$type:$2", $arg, $file, $declaration_name) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6b37039c9e92..2f5b91da5afa 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1007,6 +1007,8 @@ static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr, static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) { + Elf_Sym *new_sym; + /* If the supplied symbol has a valid name, return it */ if (is_valid_name(elf, sym)) return sym; @@ -1015,8 +1017,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) * Strive to find a better symbol name, but the resulting name may not * match the symbol referenced in the original code. */ - return symsearch_find_nearest(elf, addr, get_secindex(elf, sym), - true, 20); + new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym), + true, 20); + return new_sym ? new_sym : sym; } static bool is_executable_section(struct elf_info *elf, unsigned int secndx) diff --git a/security/security.c b/security/security.c index 7e118858b545..0a9a0ac3f266 100644 --- a/security/security.c +++ b/security/security.c @@ -1793,11 +1793,11 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry, EXPORT_SYMBOL(security_path_mknod); /** - * security_path_post_mknod() - Update inode security field after file creation + * security_path_post_mknod() - Update inode security after reg file creation * @idmap: idmap of the mount * @dentry: new file * - * Update inode security field after a file has been created. + * Update inode security field after a regular file has been created. */ void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 0619a1cbbfbe..074d6c2714eb 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -2123,7 +2123,6 @@ static struct file_system_type sel_fs_type = { .kill_sb = sel_kill_sb, }; -static struct vfsmount *selinuxfs_mount __ro_after_init; struct path selinux_null __ro_after_init; static int __init init_sel_fs(void) @@ -2145,18 +2144,21 @@ static int __init init_sel_fs(void) return err; } - selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type); - if (IS_ERR(selinuxfs_mount)) { + selinux_null.mnt = kern_mount(&sel_fs_type); + if (IS_ERR(selinux_null.mnt)) { pr_err("selinuxfs: could not mount!\n"); - err = PTR_ERR(selinuxfs_mount); - selinuxfs_mount = NULL; + err = PTR_ERR(selinux_null.mnt); + selinux_null.mnt = NULL; + return err; } + selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root, &null_name); if (IS_ERR(selinux_null.dentry)) { pr_err("selinuxfs: could not lookup null!\n"); err = PTR_ERR(selinux_null.dentry); selinux_null.dentry = NULL; + return err; } return err; diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 6a384b922e4f..d1f6cdcf1866 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -557,9 +557,32 @@ static const struct config_entry *snd_intel_dsp_find_config if (table->codec_hid) { int i; - for (i = 0; i < table->codec_hid->num_codecs; i++) - if (acpi_dev_present(table->codec_hid->codecs[i], NULL, -1)) + for (i = 0; i < table->codec_hid->num_codecs; i++) { + struct nhlt_acpi_table *nhlt; + bool ssp_found = false; + + if (!acpi_dev_present(table->codec_hid->codecs[i], NULL, -1)) + continue; + + nhlt = intel_nhlt_init(&pci->dev); + if (!nhlt) { + dev_warn(&pci->dev, "%s: NHLT table not found, skipped HID %s\n", + __func__, table->codec_hid->codecs[i]); + continue; + } + + if (intel_nhlt_has_endpoint_type(nhlt, NHLT_LINK_SSP) && + intel_nhlt_ssp_endpoint_mask(nhlt, NHLT_DEVICE_I2S)) + ssp_found = true; + + intel_nhlt_free(nhlt); + + if (ssp_found) break; + + dev_warn(&pci->dev, "%s: no valid SSP found for HID %s, skipped\n", + __func__, table->codec_hid->codecs[i]); + } if (i == table->codec_hid->num_codecs) continue; } diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 5f60658c6051..d7417a40392b 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -45,6 +45,8 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, u8 idx) "intel-quirk-mask", &quirk_mask); + fwnode_handle_put(link); + if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) return false; diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 1a3f84599cb5..558c1f38fe97 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -644,6 +644,8 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) ret = cs35l56_wait_for_firmware_boot(&cs35l56->base); if (ret) goto err_powered_up; + + regcache_cache_only(cs35l56->base.regmap, false); } /* Disable auto-hibernate so that runtime_pm has control */ @@ -1002,6 +1004,8 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) if (ret) goto err; + regcache_cache_only(cs35l56->base.regmap, false); + ret = cs35l56_set_patch(&cs35l56->base); if (ret) goto err; diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 2cac337f5263..325e8f0b99a8 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -616,7 +616,6 @@ void snd_hda_shutup_pins(struct hda_codec *codec) } EXPORT_SYMBOL_GPL(snd_hda_shutup_pins); -#ifdef CONFIG_PM /* Restore the pin controls cleared previously via snd_hda_shutup_pins() */ static void restore_shutup_pins(struct hda_codec *codec) { @@ -634,7 +633,6 @@ static void restore_shutup_pins(struct hda_codec *codec) } codec->pins_shutup = 0; } -#endif static void hda_jackpoll_work(struct work_struct *work) { @@ -1001,9 +999,7 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card, codec->card = card; codec->addr = codec_addr; -#ifdef CONFIG_PM codec->power_jiffies = jiffies; -#endif snd_hda_sysfs_init(codec); @@ -1238,7 +1234,6 @@ static void purify_inactive_streams(struct hda_codec *codec) } } -#ifdef CONFIG_PM /* clean up all streams; called from suspend */ static void hda_cleanup_all_streams(struct hda_codec *codec) { @@ -1250,7 +1245,6 @@ static void hda_cleanup_all_streams(struct hda_codec *codec) really_cleanup_stream(codec, p); } } -#endif /* * amp access functions @@ -2858,7 +2852,6 @@ static void hda_exec_init_verbs(struct hda_codec *codec) static inline void hda_exec_init_verbs(struct hda_codec *codec) {} #endif -#ifdef CONFIG_PM /* update the power on/off account with the current jiffies */ static void update_power_acct(struct hda_codec *codec, bool on) { @@ -2966,9 +2959,6 @@ static int hda_codec_runtime_resume(struct device *dev) return 0; } -#endif /* CONFIG_PM */ - -#ifdef CONFIG_PM_SLEEP static int hda_codec_pm_prepare(struct device *dev) { struct hda_codec *codec = dev_to_hda_codec(dev); @@ -3023,22 +3013,19 @@ static int hda_codec_pm_restore(struct device *dev) dev->power.power_state = PMSG_RESTORE; return pm_runtime_force_resume(dev); } -#endif /* CONFIG_PM_SLEEP */ /* referred in hda_bind.c */ const struct dev_pm_ops hda_codec_driver_pm = { -#ifdef CONFIG_PM_SLEEP - .prepare = hda_codec_pm_prepare, - .complete = hda_codec_pm_complete, - .suspend = hda_codec_pm_suspend, - .resume = hda_codec_pm_resume, - .freeze = hda_codec_pm_freeze, - .thaw = hda_codec_pm_thaw, - .poweroff = hda_codec_pm_suspend, - .restore = hda_codec_pm_restore, -#endif /* CONFIG_PM_SLEEP */ - SET_RUNTIME_PM_OPS(hda_codec_runtime_suspend, hda_codec_runtime_resume, - NULL) + .prepare = pm_sleep_ptr(hda_codec_pm_prepare), + .complete = pm_sleep_ptr(hda_codec_pm_complete), + .suspend = pm_sleep_ptr(hda_codec_pm_suspend), + .resume = pm_sleep_ptr(hda_codec_pm_resume), + .freeze = pm_sleep_ptr(hda_codec_pm_freeze), + .thaw = pm_sleep_ptr(hda_codec_pm_thaw), + .poweroff = pm_sleep_ptr(hda_codec_pm_suspend), + .restore = pm_sleep_ptr(hda_codec_pm_restore), + .runtime_suspend = pm_ptr(hda_codec_runtime_suspend), + .runtime_resume = pm_ptr(hda_codec_runtime_resume), }; /* suspend the codec at shutdown; called from driver's shutdown callback */ @@ -3425,7 +3412,6 @@ int snd_hda_add_new_ctls(struct hda_codec *codec, } EXPORT_SYMBOL_GPL(snd_hda_add_new_ctls); -#ifdef CONFIG_PM /** * snd_hda_codec_set_power_save - Configure codec's runtime PM * @codec: codec device to configure @@ -3516,7 +3502,6 @@ int snd_hda_check_amp_list_power(struct hda_codec *codec, return 0; } EXPORT_SYMBOL_GPL(snd_hda_check_amp_list_power); -#endif /* * input MUX helper @@ -4060,12 +4045,10 @@ void snd_hda_bus_reset_codecs(struct hda_bus *bus) /* FIXME: maybe a better way needed for forced reset */ if (current_work() != &codec->jackpoll_work.work) cancel_delayed_work_sync(&codec->jackpoll_work); -#ifdef CONFIG_PM if (hda_codec_is_power_on(codec)) { hda_call_codec_suspend(codec); hda_call_codec_resume(codec); } -#endif } } diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 8af5ee1b0ea8..1030a8bfb3cd 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -1075,11 +1075,9 @@ irqreturn_t azx_interrupt(int irq, void *dev_id) bool active, handled = false; int repeat = 0; /* count for avoiding endless loop */ -#ifdef CONFIG_PM if (azx_has_pm_runtime(chip)) if (!pm_runtime_active(chip->card->dev)) return IRQ_NONE; -#endif spin_lock(&bus->reg_lock); diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index de2a3d08c73c..f64d9dc197a3 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -6021,7 +6021,6 @@ void snd_hda_gen_free(struct hda_codec *codec) } EXPORT_SYMBOL_GPL(snd_hda_gen_free); -#ifdef CONFIG_PM /** * snd_hda_gen_check_power_status - check the loopback power save state * @codec: the HDA codec @@ -6035,7 +6034,6 @@ int snd_hda_gen_check_power_status(struct hda_codec *codec, hda_nid_t nid) return snd_hda_check_amp_list_power(codec, &spec->loopback, nid); } EXPORT_SYMBOL_GPL(snd_hda_gen_check_power_status); -#endif /* @@ -6048,9 +6046,7 @@ static const struct hda_codec_ops generic_patch_ops = { .init = snd_hda_gen_init, .free = snd_hda_gen_free, .unsol_event = snd_hda_jack_unsol_event, -#ifdef CONFIG_PM .check_power_status = snd_hda_gen_check_power_status, -#endif }; /* diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index a8eea8367629..8f5ecf740c49 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -340,9 +340,7 @@ void snd_hda_gen_mic_autoswitch(struct hda_codec *codec, struct hda_jack_callback *jack); void snd_hda_gen_update_outputs(struct hda_codec *codec); -#ifdef CONFIG_PM int snd_hda_gen_check_power_status(struct hda_codec *codec, hda_nid_t nid); -#endif unsigned int snd_hda_gen_path_power_filter(struct hda_codec *codec, hda_nid_t nid, unsigned int power_state); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index a03f17f1328f..2bd652dcb5b3 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -186,8 +186,10 @@ MODULE_PARM_DESC(pm_blacklist, "Enable power-management denylist"); static bool power_save_controller = 1; module_param(power_save_controller, bool, 0644); MODULE_PARM_DESC(power_save_controller, "Reset controller in power save mode."); -#else +#else /* CONFIG_PM */ #define power_save 0 +#define pm_blacklist false +#define power_save_controller false #endif /* CONFIG_PM */ static int align_buffer_size = -1; @@ -893,7 +895,6 @@ static void __azx_shutdown_chip(struct azx *chip, bool skip_link_reset) display_power(chip, false); } -#ifdef CONFIG_PM static DEFINE_MUTEX(card_list_lock); static LIST_HEAD(card_list); @@ -919,7 +920,7 @@ static void azx_del_card_list(struct azx *chip) } /* trigger power-save check at writing parameter */ -static int param_set_xint(const char *val, const struct kernel_param *kp) +static int __maybe_unused param_set_xint(const char *val, const struct kernel_param *kp) { struct hda_intel *hda; struct azx *chip; @@ -990,7 +991,6 @@ static void __azx_runtime_resume(struct azx *chip) display_power(chip, false); } -#ifdef CONFIG_PM_SLEEP static int azx_prepare(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); @@ -1049,7 +1049,7 @@ static int azx_suspend(struct device *dev) return 0; } -static int azx_resume(struct device *dev) +static int __maybe_unused azx_resume(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct azx *chip; @@ -1100,9 +1100,8 @@ static int azx_thaw_noirq(struct device *dev) return 0; } -#endif /* CONFIG_PM_SLEEP */ -static int azx_runtime_suspend(struct device *dev) +static int __maybe_unused azx_runtime_suspend(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct azx *chip; @@ -1119,7 +1118,7 @@ static int azx_runtime_suspend(struct device *dev) return 0; } -static int azx_runtime_resume(struct device *dev) +static int __maybe_unused azx_runtime_resume(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct azx *chip; @@ -1136,7 +1135,7 @@ static int azx_runtime_resume(struct device *dev) return 0; } -static int azx_runtime_idle(struct device *dev) +static int __maybe_unused azx_runtime_idle(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct azx *chip; @@ -1162,23 +1161,14 @@ static int azx_runtime_idle(struct device *dev) } static const struct dev_pm_ops azx_pm = { - SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume) -#ifdef CONFIG_PM_SLEEP - .prepare = azx_prepare, - .complete = azx_complete, - .freeze_noirq = azx_freeze_noirq, - .thaw_noirq = azx_thaw_noirq, -#endif + SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume) + .prepare = pm_sleep_ptr(azx_prepare), + .complete = pm_sleep_ptr(azx_complete), + .freeze_noirq = pm_sleep_ptr(azx_freeze_noirq), + .thaw_noirq = pm_sleep_ptr(azx_thaw_noirq), SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle) }; -#define AZX_PM_OPS &azx_pm -#else -#define azx_add_card_list(chip) /* NOP */ -#define azx_del_card_list(chip) /* NOP */ -#define AZX_PM_OPS NULL -#endif /* CONFIG_PM */ - static int azx_probe_continue(struct azx *chip); @@ -2209,7 +2199,6 @@ out_free: return err; } -#ifdef CONFIG_PM /* On some boards setting power_save to a non 0 value leads to clicking / * popping sounds when ever we enter/leave powersaving mode. Ideally we would * figure out how to avoid these sounds, but that is not always feasible. @@ -2251,13 +2240,11 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), {} }; -#endif /* CONFIG_PM */ static void set_default_power_save(struct azx *chip) { int val = power_save; -#ifdef CONFIG_PM if (pm_blacklist) { const struct snd_pci_quirk *q; @@ -2268,7 +2255,6 @@ static void set_default_power_save(struct azx *chip) val = 0; } } -#endif /* CONFIG_PM */ snd_hda_set_power_save(&chip->bus, val * 1000); } @@ -2324,10 +2310,6 @@ static int azx_probe_continue(struct azx *chip) chip->fw->data); if (err < 0) goto out_free; -#ifndef CONFIG_PM - release_firmware(chip->fw); /* no longer needed */ - chip->fw = NULL; -#endif } #endif @@ -2770,7 +2752,7 @@ static struct pci_driver azx_driver = { .remove = azx_remove, .shutdown = azx_shutdown, .driver = { - .pm = AZX_PM_OPS, + .pm = &azx_pm, }, }; diff --git a/sound/pci/hda/hda_intel_trace.h b/sound/pci/hda/hda_intel_trace.h index 73a7adfa192d..2775fa81a500 100644 --- a/sound/pci/hda/hda_intel_trace.h +++ b/sound/pci/hda/hda_intel_trace.h @@ -34,7 +34,6 @@ DEFINE_EVENT(hda_pm, azx_resume, TP_ARGS(chip) ); -#ifdef CONFIG_PM DEFINE_EVENT(hda_pm, azx_runtime_suspend, TP_PROTO(struct azx *chip), TP_ARGS(chip) @@ -44,7 +43,6 @@ DEFINE_EVENT(hda_pm, azx_runtime_resume, TP_PROTO(struct azx *chip), TP_ARGS(chip) ); -#endif #endif /* _TRACE_HDA_INTEL_H */ diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index 69ebc37a4d6f..265fd4737893 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -26,7 +26,6 @@ struct hda_hint { const char *val; /* contained in the same alloc as key */ }; -#ifdef CONFIG_PM static ssize_t power_on_acct_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -47,7 +46,6 @@ static ssize_t power_off_acct_show(struct device *dev, static DEVICE_ATTR_RO(power_on_acct); static DEVICE_ATTR_RO(power_off_acct); -#endif /* CONFIG_PM */ #define CODEC_INFO_SHOW(type, field) \ static ssize_t type##_show(struct device *dev, \ @@ -745,10 +743,8 @@ static struct attribute *hda_dev_attrs[] = { &dev_attr_modelname.attr, &dev_attr_init_pin_configs.attr, &dev_attr_driver_pin_configs.attr, -#ifdef CONFIG_PM &dev_attr_power_on_acct.attr, &dev_attr_power_off_acct.attr, -#endif #ifdef CONFIG_SND_HDA_RECONFIG &dev_attr_init_verbs.attr, &dev_attr_hints.attr, diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 8afe6000f7da..1e9dadcdc51b 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -72,7 +72,6 @@ static int create_beep_ctls(struct hda_codec *codec) #define create_beep_ctls(codec) 0 #endif -#ifdef CONFIG_PM static void ad198x_power_eapd_write(struct hda_codec *codec, hda_nid_t front, hda_nid_t hp) { @@ -118,7 +117,6 @@ static int ad198x_suspend(struct hda_codec *codec) ad198x_power_eapd(codec); return 0; } -#endif /* follow EAPD via vmaster hook */ static void ad_vmaster_eapd_hook(void *private_data, int enabled) @@ -158,10 +156,8 @@ static const struct hda_codec_ops ad198x_auto_patch_ops = { .init = snd_hda_gen_init, .free = snd_hda_gen_free, .unsol_event = snd_hda_jack_unsol_event, -#ifdef CONFIG_PM .check_power_status = snd_hda_gen_check_power_status, .suspend = ad198x_suspend, -#endif }; diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index aa312441604f..e4673a71551a 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -9682,7 +9682,6 @@ static void dbpro_free(struct hda_codec *codec) kfree(codec->spec); } -#ifdef CONFIG_PM static int ca0132_suspend(struct hda_codec *codec) { struct ca0132_spec *spec = codec->spec; @@ -9690,7 +9689,6 @@ static int ca0132_suspend(struct hda_codec *codec) cancel_delayed_work_sync(&spec->unsol_hp_work); return 0; } -#endif static const struct hda_codec_ops ca0132_patch_ops = { .build_controls = ca0132_build_controls, @@ -9698,9 +9696,7 @@ static const struct hda_codec_ops ca0132_patch_ops = { .init = ca0132_init, .free = ca0132_free, .unsol_event = snd_hda_jack_unsol_event, -#ifdef CONFIG_PM .suspend = ca0132_suspend, -#endif }; static const struct hda_codec_ops dbpro_patch_ops = { diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 6807b4708a17..654724559355 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -1128,7 +1128,6 @@ static int cs421x_parse_auto_config(struct hda_codec *codec) return 0; } -#ifdef CONFIG_PM /* * Manage PDREF, when transitioning to D3hot * (DAC,ADC) -> D3, PDREF=1, AFG->D3 @@ -1153,7 +1152,6 @@ static int cs421x_suspend(struct hda_codec *codec) return 0; } -#endif static const struct hda_codec_ops cs421x_patch_ops = { .build_controls = snd_hda_gen_build_controls, @@ -1161,9 +1159,7 @@ static const struct hda_codec_ops cs421x_patch_ops = { .init = cs421x_init, .free = cs_free, .unsol_event = snd_hda_jack_unsol_event, -#ifdef CONFIG_PM .suspend = cs421x_suspend, -#endif }; static int patch_cs4210(struct hda_codec *codec) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index e8209178d87b..17389a3801bd 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -294,13 +294,11 @@ static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) snd_hda_jack_unsol_event(codec, res); } -#ifdef CONFIG_PM static int cx_auto_suspend(struct hda_codec *codec) { cx_auto_shutdown(codec); return 0; } -#endif static const struct hda_codec_ops cx_auto_patch_ops = { .build_controls = snd_hda_gen_build_controls, @@ -308,10 +306,8 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .init = cx_auto_init, .free = cx_auto_free, .unsol_event = cx_jack_unsol_event, -#ifdef CONFIG_PM .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, -#endif }; /* diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index e41316e2e983..26f3c31600d7 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -909,7 +909,6 @@ static void cs42l42_resume(struct sub_codec *cs42l42) cs42l42_enable_jack_detect(cs42l42); } -#ifdef CONFIG_PM static void cs42l42_suspend(struct sub_codec *cs42l42) { struct hda_codec *codec = cs42l42->codec; @@ -948,7 +947,6 @@ static void cs42l42_suspend(struct sub_codec *cs42l42) spec->gpio_data &= ~cs42l42->reset_gpio; snd_hda_codec_write(codec, CS8409_PIN_AFG, 0, AC_VERB_SET_GPIO_DATA, spec->gpio_data); } -#endif static void cs8409_free(struct hda_codec *codec) { @@ -1003,7 +1001,6 @@ static void cs8409_cs42l42_jack_unsol_event(struct hda_codec *codec, unsigned in } } -#ifdef CONFIG_PM /* Manage PDREF, when transition to D3hot */ static int cs8409_cs42l42_suspend(struct hda_codec *codec) { @@ -1025,7 +1022,6 @@ static int cs8409_cs42l42_suspend(struct hda_codec *codec) return 0; } -#endif /* Vendor specific HW configuration * PLL, ASP, I2C, SPI, GPIOs, DMIC etc... @@ -1080,9 +1076,7 @@ static const struct hda_codec_ops cs8409_cs42l42_patch_ops = { .init = cs8409_init, .free = cs8409_free, .unsol_event = cs8409_cs42l42_jack_unsol_event, -#ifdef CONFIG_PM .suspend = cs8409_cs42l42_suspend, -#endif }; static int cs8409_cs42l42_exec_verb(struct hdac_device *dev, unsigned int cmd, unsigned int flags, @@ -1310,9 +1304,7 @@ static const struct hda_codec_ops cs8409_dolphin_patch_ops = { .init = cs8409_init, .free = cs8409_free, .unsol_event = dolphin_jack_unsol_event, -#ifdef CONFIG_PM .suspend = cs8409_cs42l42_suspend, -#endif }; static int dolphin_exec_verb(struct hdac_device *dev, unsigned int cmd, unsigned int flags, diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 0a00c5ea42e0..db24c8fc7cd0 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2513,7 +2513,6 @@ static void generic_hdmi_free(struct hda_codec *codec) generic_spec_free(codec); } -#ifdef CONFIG_PM static int generic_hdmi_suspend(struct hda_codec *codec) { struct hdmi_spec *spec = codec->spec; @@ -2540,7 +2539,6 @@ static int generic_hdmi_resume(struct hda_codec *codec) } return 0; } -#endif static const struct hda_codec_ops generic_hdmi_patch_ops = { .init = generic_hdmi_init, @@ -2548,10 +2546,8 @@ static const struct hda_codec_ops generic_hdmi_patch_ops = { .build_pcms = generic_hdmi_build_pcms, .build_controls = generic_hdmi_build_controls, .unsol_event = hdmi_unsol_event, -#ifdef CONFIG_PM .suspend = generic_hdmi_suspend, .resume = generic_hdmi_resume, -#endif }; static const struct hdmi_ops generic_standard_hdmi_ops = { @@ -2952,7 +2948,6 @@ static void i915_pin_cvt_fixup(struct hda_codec *codec, } } -#ifdef CONFIG_PM static int i915_adlp_hdmi_suspend(struct hda_codec *codec) { struct hdmi_spec *spec = codec->spec; @@ -3032,7 +3027,6 @@ static int i915_adlp_hdmi_resume(struct hda_codec *codec) return res; } -#endif /* precondition and allocation for Intel codecs */ static int alloc_intel_hdmi(struct hda_codec *codec) @@ -3167,10 +3161,8 @@ static int patch_i915_adlp_hdmi(struct hda_codec *codec) if (spec->silent_stream_type) { spec->silent_stream_type = SILENT_STREAM_KAE; -#ifdef CONFIG_PM codec->patch_ops.resume = i915_adlp_hdmi_resume; codec->patch_ops.suspend = i915_adlp_hdmi_suspend; -#endif } } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 38a9b4872f1e..2eb419c814f6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -109,9 +109,7 @@ struct alc_spec { /* hooks */ void (*init_hook)(struct hda_codec *codec); -#ifdef CONFIG_PM void (*power_hook)(struct hda_codec *codec); -#endif void (*shutup)(struct hda_codec *codec); int init_amp; @@ -920,6 +918,8 @@ static void alc_pre_init(struct hda_codec *codec) ((codec)->core.dev.power.power_state.event == PM_EVENT_RESUME) #define is_s4_resume(codec) \ ((codec)->core.dev.power.power_state.event == PM_EVENT_RESTORE) +#define is_s4_suspend(codec) \ + ((codec)->core.dev.power.power_state.event == PM_EVENT_FREEZE) static int alc_init(struct hda_codec *codec) { @@ -945,7 +945,6 @@ static int alc_init(struct hda_codec *codec) #define alc_free snd_hda_gen_free -#ifdef CONFIG_PM static inline void alc_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -984,7 +983,6 @@ static int alc_resume(struct hda_codec *codec) hda_call_check_power_status(codec, 0x01); return 0; } -#endif /* */ @@ -994,11 +992,9 @@ static const struct hda_codec_ops alc_patch_ops = { .init = alc_init, .free = alc_free, .unsol_event = snd_hda_jack_unsol_event, -#ifdef CONFIG_PM .resume = alc_resume, .suspend = alc_suspend, .check_power_status = snd_hda_gen_check_power_status, -#endif }; @@ -4039,7 +4035,6 @@ static void alc5505_dsp_init(struct hda_codec *codec) #define alc5505_dsp_resume(codec) alc5505_dsp_back_from_halt(codec) #endif -#ifdef CONFIG_PM static int alc269_suspend(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4085,7 +4080,6 @@ static int alc269_resume(struct hda_codec *codec) return 0; } -#endif /* CONFIG_PM */ static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -7183,6 +7177,44 @@ static void alc245_fixup_hp_spectre_x360_eu0xxx(struct hda_codec *codec, alc245_fixup_hp_gpio_led(codec, fix, action); } +/* + * ALC287 PCM hooks + */ +static void alc287_alc1318_playback_pcm_hook(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream, + int action) +{ + alc_write_coef_idx(codec, 0x10, 0x8806); /* Change MLK to GPIO3 */ + switch (action) { + case HDA_GEN_PCM_ACT_OPEN: + alc_write_coefex_idx(codec, 0x5a, 0x00, 0x954f); /* write gpio3 to high */ + break; + case HDA_GEN_PCM_ACT_CLOSE: + alc_write_coefex_idx(codec, 0x5a, 0x00, 0x554f); /* write gpio3 as default value */ + break; + } +} + +static void alc287_s4_power_gpio3_default(struct hda_codec *codec) +{ + if (is_s4_suspend(codec)) { + alc_write_coef_idx(codec, 0x10, 0x8806); /* Change MLK to GPIO3 */ + alc_write_coefex_idx(codec, 0x5a, 0x00, 0x554f); /* write gpio3 as default value */ + } +} + +static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + spec->power_hook = alc287_s4_power_gpio3_default; + spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook; +} + enum { ALC269_FIXUP_GPIO2, @@ -7426,6 +7458,7 @@ enum { ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, ALC298_FIXUP_LENOVO_C940_DUET7, ALC287_FIXUP_LENOVO_14IRP8_DUETITL, + ALC287_FIXUP_LENOVO_LEGION_7, ALC287_FIXUP_13S_GEN2_SPEAKERS, ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, @@ -7470,7 +7503,8 @@ enum { ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC, ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1, ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC, - ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1 + ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1, + ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -7510,6 +7544,23 @@ static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* Another hilarious PCI SSID conflict with Lenovo Legion Pro 7 16ARX8H (with + * TAS2781 codec) and Legion 7i 16IAX7 (with CS35L41 codec); + * we apply a corresponding fixup depending on the codec SSID instead + */ +static void alc287_fixup_lenovo_legion_7(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa38a8) + id = ALC287_FIXUP_TAS2781_I2C; /* Legion Pro 7 16ARX8H */ + else + id = ALC287_FIXUP_CS35L41_I2C_2; /* Legion 7i 16IAX7 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -9404,6 +9455,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_lenovo_14irp8_duetitl, }, + [ALC287_FIXUP_LENOVO_LEGION_7] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_legion_7, + }, [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9726,6 +9781,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC285_FIXUP_ASUS_GA403U, }, + [ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_thinkpad_with_alc1318, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9937,6 +9998,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), @@ -10397,6 +10459,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x231e, "Thinkpad", ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318), + SND_PCI_QUIRK(0x17aa, 0x231f, "Thinkpad", ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), @@ -10428,7 +10492,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3865, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3866, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), - SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7/7i", ALC287_FIXUP_LENOVO_LEGION_7), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3878, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), @@ -11219,10 +11283,8 @@ static int patch_alc269(struct hda_codec *codec) codec->power_save_node = 0; spec->en_3kpull_low = true; -#ifdef CONFIG_PM codec->patch_ops.suspend = alc269_suspend; codec->patch_ops.resume = alc269_resume; -#endif spec->shutup = alc_default_shutup; spec->init_hook = alc_default_init; @@ -11520,9 +11582,7 @@ static int patch_alc861(struct hda_codec *codec) if (has_cdefine_beep(codec)) spec->gen.beep_nid = 0x23; -#ifdef CONFIG_PM spec->power_hook = alc_power_eapd; -#endif alc_pre_init(codec); diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 61258b0aac8d..ae1a34c68c61 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -2154,10 +2154,8 @@ static void stac92hd83xxx_fixup_hp_mic_led(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->mic_mute_led_gpio = 0x08; /* GPIO3 */ -#ifdef CONFIG_PM /* resetting controller clears GPIO, so we need to keep on */ codec->core.power_caps &= ~AC_PWRST_CLKSTOP; -#endif } } @@ -4442,7 +4440,6 @@ static void stac927x_proc_hook(struct snd_info_buffer *buffer, #define stac927x_proc_hook NULL #endif -#ifdef CONFIG_PM static int stac_suspend(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; @@ -4456,9 +4453,6 @@ static int stac_suspend(struct hda_codec *codec) return 0; } -#else -#define stac_suspend NULL -#endif /* CONFIG_PM */ static const struct hda_codec_ops stac_patch_ops = { .build_controls = snd_hda_gen_build_controls, @@ -4466,9 +4460,7 @@ static const struct hda_codec_ops stac_patch_ops = { .init = stac_init, .free = stac_free, .unsol_event = snd_hda_jack_unsol_event, -#ifdef CONFIG_PM .suspend = stac_suspend, -#endif }; static int alloc_stac_spec(struct hda_codec *codec) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 2994f85bc1b9..a8ef4bb70dd0 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -379,7 +379,6 @@ static void via_free(struct hda_codec *codec) snd_hda_gen_free(codec); } -#ifdef CONFIG_PM static int via_suspend(struct hda_codec *codec) { struct via_spec *spec = codec->spec; @@ -400,9 +399,7 @@ static int via_resume(struct hda_codec *codec) snd_hda_regmap_sync(codec); return 0; } -#endif -#ifdef CONFIG_PM static int via_check_power_status(struct hda_codec *codec, hda_nid_t nid) { struct via_spec *spec = codec->spec; @@ -410,7 +407,6 @@ static int via_check_power_status(struct hda_codec *codec, hda_nid_t nid) vt1708_update_hp_work(codec); return snd_hda_check_amp_list_power(codec, &spec->gen.loopback, nid); } -#endif /* */ @@ -423,11 +419,9 @@ static const struct hda_codec_ops via_patch_ops = { .init = via_init, .free = via_free, .unsol_event = snd_hda_jack_unsol_event, -#ifdef CONFIG_PM .suspend = via_suspend, .resume = via_resume, .check_power_status = via_check_power_status, -#endif }; diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 69c68d8e7a6b..1760b5d42460 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -433,6 +433,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "MDC"), + DMI_MATCH(DMI_BOARD_NAME, "Herbag_MDU"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "System76"), DMI_MATCH(DMI_PRODUCT_VERSION, "pang12"), } diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index dfb4ce53491b..f8e57a2fc3e3 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -1094,6 +1094,7 @@ static int cs35l41_handle_pdata(struct device *dev, struct cs35l41_hw_cfg *hw_cf static int cs35l41_dsp_init(struct cs35l41_private *cs35l41) { struct wm_adsp *dsp; + uint32_t dsp1rx5_src; int ret; dsp = &cs35l41->dsp; @@ -1113,16 +1114,29 @@ static int cs35l41_dsp_init(struct cs35l41_private *cs35l41) return ret; } - ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX5_SRC, - CS35L41_INPUT_SRC_VPMON); + switch (cs35l41->hw_cfg.bst_type) { + case CS35L41_INT_BOOST: + case CS35L41_SHD_BOOST_ACTV: + dsp1rx5_src = CS35L41_INPUT_SRC_VPMON; + break; + case CS35L41_EXT_BOOST: + case CS35L41_SHD_BOOST_PASS: + dsp1rx5_src = CS35L41_INPUT_SRC_VBSTMON; + break; + default: + dev_err(cs35l41->dev, "wm_halo_init failed - Invalid Boost Type: %d\n", + cs35l41->hw_cfg.bst_type); + goto err_dsp; + } + + ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX5_SRC, dsp1rx5_src); if (ret < 0) { - dev_err(cs35l41->dev, "Write INPUT_SRC_VPMON failed: %d\n", ret); + dev_err(cs35l41->dev, "Write DSP1RX5_SRC: %d failed: %d\n", dsp1rx5_src, ret); goto err_dsp; } - ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX6_SRC, - CS35L41_INPUT_SRC_CLASSH); + ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX6_SRC, CS35L41_INPUT_SRC_VBSTMON); if (ret < 0) { - dev_err(cs35l41->dev, "Write INPUT_SRC_CLASSH failed: %d\n", ret); + dev_err(cs35l41->dev, "Write CS35L41_INPUT_SRC_VBSTMON failed: %d\n", ret); goto err_dsp; } ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX7_SRC, diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index 14a5f86019aa..70ff55c1517f 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -188,8 +188,6 @@ static void cs35l56_sdw_init(struct sdw_slave *peripheral) goto out; } - regcache_cache_only(cs35l56->base.regmap, false); - ret = cs35l56_init(cs35l56); if (ret < 0) { regcache_cache_only(cs35l56->base.regmap, true); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 08cac58e3ab2..fd02b621da52 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -40,16 +40,11 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_set_patch, SND_SOC_CS35L56_SHARED); static const struct reg_default cs35l56_reg_defaults[] = { /* no defaults for OTP_MEM - first read populates cache */ - { CS35L56_ASP1_ENABLES1, 0x00000000 }, - { CS35L56_ASP1_CONTROL1, 0x00000028 }, - { CS35L56_ASP1_CONTROL2, 0x18180200 }, - { CS35L56_ASP1_CONTROL3, 0x00000002 }, - { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 }, - { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 }, - { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 }, - { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 }, - - /* no defaults for ASP1TX mixer */ + /* + * No defaults for ASP1 control or ASP1TX mixer. See + * cs35l56_populate_asp1_register_defaults() and + * cs35l56_sync_asp1_mixer_widgets_with_firmware(). + */ { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, @@ -210,6 +205,36 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) } } +static const struct reg_sequence cs35l56_asp1_defaults[] = { + REG_SEQ0(CS35L56_ASP1_ENABLES1, 0x00000000), + REG_SEQ0(CS35L56_ASP1_CONTROL1, 0x00000028), + REG_SEQ0(CS35L56_ASP1_CONTROL2, 0x18180200), + REG_SEQ0(CS35L56_ASP1_CONTROL3, 0x00000002), + REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL1, 0x03020100), + REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL5, 0x00020100), + REG_SEQ0(CS35L56_ASP1_DATA_CONTROL1, 0x00000018), + REG_SEQ0(CS35L56_ASP1_DATA_CONTROL5, 0x00000018), +}; + +/* + * The firmware can have control of the ASP so we don't provide regmap + * with defaults for these registers, to prevent a regcache_sync() from + * overwriting the firmware settings. But if the machine driver hooks up + * the ASP it means the driver is taking control of the ASP, so then the + * registers are populated with the defaults. + */ +int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base) +{ + if (!cs35l56_base->fw_owns_asp1) + return 0; + + cs35l56_base->fw_owns_asp1 = false; + + return regmap_multi_reg_write(cs35l56_base->regmap, cs35l56_asp1_defaults, + ARRAY_SIZE(cs35l56_asp1_defaults)); +} +EXPORT_SYMBOL_NS_GPL(cs35l56_init_asp1_regs_for_driver_control, SND_SOC_CS35L56_SHARED); + /* * The firmware boot sequence can overwrite the ASP1 config registers so that * they don't match regmap's view of their values. Rewrite the values from the @@ -217,19 +242,15 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) */ int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base) { - struct reg_sequence asp1_regs[] = { - { .reg = CS35L56_ASP1_ENABLES1 }, - { .reg = CS35L56_ASP1_CONTROL1 }, - { .reg = CS35L56_ASP1_CONTROL2 }, - { .reg = CS35L56_ASP1_CONTROL3 }, - { .reg = CS35L56_ASP1_FRAME_CONTROL1 }, - { .reg = CS35L56_ASP1_FRAME_CONTROL5 }, - { .reg = CS35L56_ASP1_DATA_CONTROL1 }, - { .reg = CS35L56_ASP1_DATA_CONTROL5 }, - }; + struct reg_sequence asp1_regs[ARRAY_SIZE(cs35l56_asp1_defaults)]; int i, ret; - /* Read values from regmap cache into a write sequence */ + if (cs35l56_base->fw_owns_asp1) + return 0; + + memcpy(asp1_regs, cs35l56_asp1_defaults, sizeof(asp1_regs)); + + /* Read current values from regmap cache into the write sequence */ for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) { ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def); if (ret) @@ -307,10 +328,10 @@ int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base) reg = CS35L56_DSP1_HALO_STATE; /* - * This can't be a regmap_read_poll_timeout() because cs35l56 will NAK - * I2C until it has booted which would terminate the poll + * The regmap must remain in cache-only until the chip has + * booted, so use a bypassed read of the status register. */ - poll_ret = read_poll_timeout(regmap_read, read_ret, + poll_ret = read_poll_timeout(regmap_read_bypassed, read_ret, (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE), CS35L56_HALO_STATE_POLL_US, CS35L56_HALO_STATE_TIMEOUT_US, @@ -362,7 +383,8 @@ void cs35l56_system_reset(struct cs35l56_base *cs35l56_base, bool is_soundwire) return; cs35l56_wait_control_port_ready(); - regcache_cache_only(cs35l56_base->regmap, false); + + /* Leave in cache-only. This will be revoked when the chip has rebooted. */ } EXPORT_SYMBOL_NS_GPL(cs35l56_system_reset, SND_SOC_CS35L56_SHARED); @@ -577,14 +599,14 @@ int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_sou cs35l56_issue_wake_event(cs35l56_base); out_sync: - regcache_cache_only(cs35l56_base->regmap, false); - ret = cs35l56_wait_for_firmware_boot(cs35l56_base); if (ret) { dev_err(cs35l56_base->dev, "Hibernate wake failed: %d\n", ret); goto err; } + regcache_cache_only(cs35l56_base->regmap, false); + ret = cs35l56_mbox_send(cs35l56_base, CS35L56_MBOX_CMD_PREVENT_AUTO_HIBERNATE); if (ret) goto err; @@ -684,7 +706,7 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_calibration_controls, SND_SOC_CS35L56_SHARED); int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base) { - u64 silicon_uid; + u64 silicon_uid = 0; int ret; /* Driver can't apply calibration to a secured part, so skip */ @@ -757,7 +779,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) * devices so the REVID needs to be determined before waiting for the * firmware to boot. */ - ret = regmap_read(cs35l56_base->regmap, CS35L56_REVID, &revid); + ret = regmap_read_bypassed(cs35l56_base->regmap, CS35L56_REVID, &revid); if (ret < 0) { dev_err(cs35l56_base->dev, "Get Revision ID failed\n"); return ret; @@ -768,7 +790,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) if (ret) return ret; - ret = regmap_read(cs35l56_base->regmap, CS35L56_DEVID, &devid); + ret = regmap_read_bypassed(cs35l56_base->regmap, CS35L56_DEVID, &devid); if (ret < 0) { dev_err(cs35l56_base->dev, "Get Device ID failed\n"); return ret; @@ -787,6 +809,9 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) cs35l56_base->type = devid & 0xFF; + /* Silicon is now identified and booted so exit cache-only */ + regcache_cache_only(cs35l56_base->regmap, false); + ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP_RESTRICT_STS1, &secured); if (ret) { dev_err(cs35l56_base->dev, "Get Secure status failed\n"); diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 8d2f021fb362..4986e78105da 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -454,9 +454,14 @@ static int cs35l56_asp_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int f { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component); unsigned int val; + int ret; dev_dbg(cs35l56->base.dev, "%s: %#x\n", __func__, fmt); + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; + switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { case SND_SOC_DAIFMT_CBC_CFC: break; @@ -530,6 +535,11 @@ static int cs35l56_asp_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx unsigned int rx_mask, int slots, int slot_width) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); + int ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; if ((slots == 0) || (slot_width == 0)) { dev_dbg(cs35l56->base.dev, "tdm config cleared\n"); @@ -578,6 +588,11 @@ static int cs35l56_asp_dai_hw_params(struct snd_pcm_substream *substream, struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); unsigned int rate = params_rate(params); u8 asp_width, asp_wl; + int ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; asp_wl = params_width(params); if (cs35l56->asp_slot_width) @@ -634,7 +649,11 @@ static int cs35l56_asp_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, unsigned int freq, int dir) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); - int freq_id; + int freq_id, ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; if (freq == 0) { cs35l56->sysclk_set = false; @@ -1341,6 +1360,7 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 "spk-id-gpios", ACPI_TYPE_PACKAGE, &obj); if (ret) { dev_dbg(cs35l56->base.dev, "Could not get spk-id-gpios package: %d\n", ret); + fwnode_handle_put(af01_fwnode); return -ENOENT; } @@ -1348,6 +1368,7 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 if (obj->package.count != 4) { dev_warn(cs35l56->base.dev, "Unexpected spk-id element count %d\n", obj->package.count); + fwnode_handle_put(af01_fwnode); return -ENOENT; } @@ -1362,6 +1383,7 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 */ ret = acpi_dev_add_driver_gpios(adev, cs35l56_af01_spkid_gpios_mapping); if (ret) { + fwnode_handle_put(af01_fwnode); return dev_err_probe(cs35l56->base.dev, ret, "Failed to add gpio mapping to AF01\n"); } @@ -1369,14 +1391,17 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 ret = devm_add_action_or_reset(cs35l56->base.dev, cs35l56_acpi_dev_release_driver_gpios, adev); - if (ret) + if (ret) { + fwnode_handle_put(af01_fwnode); return ret; + } dev_dbg(cs35l56->base.dev, "Added spk-id-gpios mapping to AF01\n"); } desc = fwnode_gpiod_get_index(af01_fwnode, "spk-id", 0, GPIOD_IN, NULL); if (IS_ERR(desc)) { + fwnode_handle_put(af01_fwnode); ret = PTR_ERR(desc); return dev_err_probe(cs35l56->base.dev, ret, "Get GPIO from AF01 failed\n"); } @@ -1385,9 +1410,12 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 gpiod_put(desc); if (ret < 0) { + fwnode_handle_put(af01_fwnode); dev_err_probe(cs35l56->base.dev, ret, "Error reading spk-id GPIO\n"); return ret; - } + } + + fwnode_handle_put(af01_fwnode); dev_info(cs35l56->base.dev, "Got spk-id from AF01\n"); @@ -1403,6 +1431,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) cs35l56->base.cal_index = -1; cs35l56->speaker_id = -ENOENT; + /* Assume that the firmware owns ASP1 until we know different */ + cs35l56->base.fw_owns_asp1 = true; + dev_set_drvdata(cs35l56->base.dev, cs35l56); cs35l56_fill_supply_names(cs35l56->supplies); @@ -1531,6 +1562,8 @@ post_soft_reset: return ret; dev_dbg(cs35l56->base.dev, "Firmware rebooted after soft reset\n"); + + regcache_cache_only(cs35l56->base.regmap, false); } /* Disable auto-hibernate so that runtime_pm has control */ diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 6bc068cdcbe2..15e5e3eb592b 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -671,8 +671,10 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) return NULL; aad_pdata = devm_kzalloc(dev, sizeof(*aad_pdata), GFP_KERNEL); - if (!aad_pdata) + if (!aad_pdata) { + fwnode_handle_put(aad_np); return NULL; + } aad_pdata->irq = i2c->irq; @@ -753,6 +755,8 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) else aad_pdata->adc_1bit_rpt = DA7219_AAD_ADC_1BIT_RPT_1; + fwnode_handle_put(aad_np); + return aad_pdata; } diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index e3ba04484813..d0d24a53df74 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -444,6 +444,7 @@ struct rt5645_priv { struct regmap *regmap; struct i2c_client *i2c; struct gpio_desc *gpiod_hp_det; + struct gpio_desc *gpiod_cbj_sleeve; struct snd_soc_jack *hp_jack; struct snd_soc_jack *mic_jack; struct snd_soc_jack *btn_jack; @@ -3186,6 +3187,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse regmap_update_bits(rt5645->regmap, RT5645_IN1_CTRL2, RT5645_CBJ_MN_JD, 0); + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 1); + msleep(600); regmap_read(rt5645->regmap, RT5645_IN1_CTRL3, &val); val &= 0x7; @@ -3202,6 +3206,8 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse snd_soc_dapm_disable_pin(dapm, "Mic Det Power"); snd_soc_dapm_sync(dapm); rt5645->jack_type = SND_JACK_HEADPHONE; + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, @@ -3229,6 +3235,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } return rt5645->jack_type; @@ -4012,6 +4021,16 @@ static int rt5645_i2c_probe(struct i2c_client *i2c) return ret; } + rt5645->gpiod_cbj_sleeve = devm_gpiod_get_optional(&i2c->dev, "cbj-sleeve", + GPIOD_OUT_LOW); + + if (IS_ERR(rt5645->gpiod_cbj_sleeve)) { + ret = PTR_ERR(rt5645->gpiod_cbj_sleeve); + dev_info(&i2c->dev, "failed to initialize gpiod, ret=%d\n", ret); + if (ret != -ENOENT) + return ret; + } + for (i = 0; i < ARRAY_SIZE(rt5645->supplies); i++) rt5645->supplies[i].supply = rt5645_supply_names[i]; @@ -4259,6 +4278,9 @@ static void rt5645_i2c_remove(struct i2c_client *i2c) cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); + regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies); } @@ -4274,6 +4296,9 @@ static void rt5645_i2c_shutdown(struct i2c_client *i2c) 0); msleep(20); regmap_write(rt5645->regmap, RT5645_RESET, 0); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } static int __maybe_unused rt5645_sys_suspend(struct device *dev) diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c index 3fb7b9adb61d..bc3579203c7a 100644 --- a/sound/soc/codecs/rt715-sdca.c +++ b/sound/soc/codecs/rt715-sdca.c @@ -316,7 +316,7 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol, return 0; } -static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -17625, 375, 0); +static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1725, 75, 0); static const DECLARE_TLV_DB_SCALE(mic_vol_tlv, 0, 1000, 0); static int rt715_sdca_get_volsw(struct snd_kcontrol *kcontrol, @@ -477,7 +477,7 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC7_27_VOL, RT715_SDCA_FU_VOL_CTRL, CH_02), - 0x2f, 0x7f, 0, + 0x2f, 0x3f, 0, rt715_sdca_set_amp_gain_get, rt715_sdca_set_amp_gain_put, in_vol_tlv), RT715_SDCA_EXT_TLV("FU02 Capture Volume", @@ -485,13 +485,13 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), RT715_SDCA_EXT_TLV("FU06 Capture Volume", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC10_11_VOL, RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), /* MIC Boost Control */ RT715_SDCA_BOOST_EXT_TLV("FU0E Boost", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_DMIC_GAIN_EN, diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index 7e13868ff99f..f012fe0ded6d 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -111,6 +111,7 @@ static bool rt715_readable_register(struct device *dev, unsigned int reg) case 0x839d: case 0x83a7: case 0x83a9: + case 0x752001: case 0x752039: return true; default: diff --git a/sound/soc/codecs/rt722-sdca.c b/sound/soc/codecs/rt722-sdca.c index e0ea3a23f7cc..e5bd9ef812de 100644 --- a/sound/soc/codecs/rt722-sdca.c +++ b/sound/soc/codecs/rt722-sdca.c @@ -1330,7 +1330,7 @@ static struct snd_soc_dai_driver rt722_sdca_dai[] = { .capture = { .stream_name = "DP6 DMic Capture", .channels_min = 1, - .channels_max = 2, + .channels_max = 4, .rates = RT722_STEREO_RATES, .formats = RT722_FORMATS, }, @@ -1439,9 +1439,12 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722) int loop_check, chk_cnt = 100, ret; unsigned int calib_status = 0; - /* Read eFuse */ - rt722_sdca_index_write(rt722, RT722_VENDOR_SPK_EFUSE, RT722_DC_CALIB_CTRL, - 0x4808); + /* Config analog bias */ + rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_ANALOG_BIAS_CTL3, + 0xa081); + /* GE related settings */ + rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_GE_RELATED_CTL2, + 0xa009); /* Button A, B, C, D bypass mode */ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL4, 0xcf00); @@ -1475,9 +1478,6 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722) if ((calib_status & 0x0040) == 0x0) break; } - /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */ - rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4, - 0x0010); /* Set ADC09 power entity floating control */ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ADC0A_08_PDE_FLOAT_CTL, 0x2a12); @@ -1490,8 +1490,21 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722) /* Set DAC03 and HP power entity floating control */ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_DAC03_HP_PDE_FLOAT_CTL, 0x4040); + rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ENT_FLOAT_CTRL_1, + 0x4141); + rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_FLOAT_CTRL_1, + 0x0101); /* Fine tune PDE40 latency */ regmap_write(rt722->regmap, 0x2f58, 0x07); + regmap_write(rt722->regmap, 0x2f03, 0x06); + /* MIC VRefo */ + rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG, + RT722_COMBO_JACK_AUTO_CTL1, 0x0200, 0x0200); + rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG, + RT722_VREFO_GAT, 0x4000, 0x4000); + /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */ + rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4, + 0x0010); } int rt722_sdca_io_init(struct device *dev, struct sdw_slave *slave) diff --git a/sound/soc/codecs/rt722-sdca.h b/sound/soc/codecs/rt722-sdca.h index 44af8901352e..2464361a7958 100644 --- a/sound/soc/codecs/rt722-sdca.h +++ b/sound/soc/codecs/rt722-sdca.h @@ -69,6 +69,7 @@ struct rt722_sdca_dmic_kctrl_priv { #define RT722_COMBO_JACK_AUTO_CTL2 0x46 #define RT722_COMBO_JACK_AUTO_CTL3 0x47 #define RT722_DIGITAL_MISC_CTRL4 0x4a +#define RT722_VREFO_GAT 0x63 #define RT722_FSM_CTL 0x67 #define RT722_SDCA_INTR_REC 0x82 #define RT722_SW_CONFIG1 0x8a @@ -127,6 +128,8 @@ struct rt722_sdca_dmic_kctrl_priv { #define RT722_UMP_HID_CTL6 0x66 #define RT722_UMP_HID_CTL7 0x67 #define RT722_UMP_HID_CTL8 0x68 +#define RT722_FLOAT_CTRL_1 0x70 +#define RT722_ENT_FLOAT_CTRL_1 0x76 /* Parameter & Verb control 01 (0x1a)(NID:20h) */ #define RT722_HIDDEN_REG_SW_RESET (0x1 << 14) diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 3c025dabaf7a..1253695bebd8 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1155,6 +1155,7 @@ static int wsa881x_probe(struct sdw_slave *pdev, pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS, 0); pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; + pdev->prop.clk_stop_mode1 = true; gpiod_direction_output(wsa881x->sd_n, !wsa881x->sd_n_val); wsa881x->regmap = devm_regmap_init_sdw(pdev, &wsa881x_regmap_config); diff --git a/sound/soc/intel/avs/icl.c b/sound/soc/intel/avs/icl.c index 9d9921e1cd4d..d2554c857732 100644 --- a/sound/soc/intel/avs/icl.c +++ b/sound/soc/intel/avs/icl.c @@ -64,7 +64,7 @@ struct avs_icl_memwnd2_desc { struct avs_icl_memwnd2 { union { struct avs_icl_memwnd2_desc slot_desc[AVS_ICL_MEMWND2_SLOTS_COUNT]; - u8 rsvd[PAGE_SIZE]; + u8 rsvd[SZ_4K]; }; u8 slot_array[AVS_ICL_MEMWND2_SLOTS_COUNT][PAGE_SIZE]; } __packed; diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c index 13061bd1488b..42b42903ae9d 100644 --- a/sound/soc/intel/avs/topology.c +++ b/sound/soc/intel/avs/topology.c @@ -1582,6 +1582,8 @@ static int avs_widget_load(struct snd_soc_component *comp, int index, if (!le32_to_cpu(dw->priv.size)) return 0; + w->no_wname_in_kcontrol_name = true; + if (w->ignore_suspend && !AVS_S0IX_SUPPORTED) { dev_info_once(comp->dev, "Device does not support S0IX, check BIOS settings\n"); w->ignore_suspend = false; diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 05f38d1f7d82..b41a1147f1c3 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -636,28 +636,30 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_USE_AMCR0F28), }, { + /* Asus T100TAF, unlike other T100TA* models this one has a mono speaker */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"), }, .driver_data = (void *)(BYT_RT5640_IN1_MAP | BYT_RT5640_JD_SRC_JD2_IN4N | BYT_RT5640_OVCD_TH_2000UA | BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, { + /* Asus T100TA and T100TAM, must come after T100TAF (mono spk) match */ .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"), + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "T100TA"), }, .driver_data = (void *)(BYT_RT5640_IN1_MAP | BYT_RT5640_JD_SRC_JD2_IN4N | BYT_RT5640_OVCD_TH_2000UA | BYT_RT5640_OVCD_SF_0P75 | - BYT_RT5640_MONO_SPEAKER | - BYT_RT5640_DIFF_MIC | - BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, { diff --git a/sound/soc/meson/Kconfig b/sound/soc/meson/Kconfig index b93ea33739f2..6458d5dc4902 100644 --- a/sound/soc/meson/Kconfig +++ b/sound/soc/meson/Kconfig @@ -99,6 +99,7 @@ config SND_MESON_AXG_PDM config SND_MESON_CARD_UTILS tristate + select SND_DYNAMIC_MINORS config SND_MESON_CODEC_GLUE tristate diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c index 3180aa4d3a15..8c5605c1e34e 100644 --- a/sound/soc/meson/axg-card.c +++ b/sound/soc/meson/axg-card.c @@ -318,6 +318,7 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np, dai_link->cpus = cpu; dai_link->num_cpus = 1; + dai_link->nonatomic = true; ret = meson_card_parse_dai(card, np, dai_link->cpus); if (ret) diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index bebee0ca8e38..ecb3eb7a9723 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -204,18 +204,26 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) unsigned int status; regmap_read(fifo->map, FIFO_STATUS1, &status); - status = FIELD_GET(STATUS1_INT_STS, status); + axg_fifo_ack_irq(fifo, status); + + /* Use the thread to call period elapsed on nonatomic links */ if (status & FIFO_INT_COUNT_REPEAT) - snd_pcm_period_elapsed(ss); - else - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + return IRQ_WAKE_THREAD; - /* Ack irqs */ - axg_fifo_ack_irq(fifo, status); + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); + + return IRQ_NONE; +} + +static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) +{ + struct snd_pcm_substream *ss = dev_id; + + snd_pcm_period_elapsed(ss); - return IRQ_RETVAL(status); + return IRQ_HANDLED; } int axg_fifo_pcm_open(struct snd_soc_component *component, @@ -243,8 +251,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_irq(fifo->irq, axg_fifo_pcm_irq_block, 0, - dev_name(dev), ss); + ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, + axg_fifo_pcm_irq_block_thread, + IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; diff --git a/sound/soc/meson/axg-tdm-formatter.c b/sound/soc/meson/axg-tdm-formatter.c index 63333a2b0a9c..a6579efd3775 100644 --- a/sound/soc/meson/axg-tdm-formatter.c +++ b/sound/soc/meson/axg-tdm-formatter.c @@ -392,6 +392,46 @@ void axg_tdm_stream_free(struct axg_tdm_stream *ts) } EXPORT_SYMBOL_GPL(axg_tdm_stream_free); +int axg_tdm_stream_set_cont_clocks(struct axg_tdm_stream *ts, + unsigned int fmt) +{ + int ret = 0; + + if (fmt & SND_SOC_DAIFMT_CONT) { + /* Clock are already enabled - skipping */ + if (ts->clk_enabled) + return 0; + + ret = clk_prepare_enable(ts->iface->mclk); + if (ret) + return ret; + + ret = clk_prepare_enable(ts->iface->sclk); + if (ret) + goto err_sclk; + + ret = clk_prepare_enable(ts->iface->lrclk); + if (ret) + goto err_lrclk; + + ts->clk_enabled = true; + return 0; + } + + /* Clocks are already disabled - skipping */ + if (!ts->clk_enabled) + return 0; + + clk_disable_unprepare(ts->iface->lrclk); +err_lrclk: + clk_disable_unprepare(ts->iface->sclk); +err_sclk: + clk_disable_unprepare(ts->iface->mclk); + ts->clk_enabled = false; + return ret; +} +EXPORT_SYMBOL_GPL(axg_tdm_stream_set_cont_clocks); + MODULE_DESCRIPTION("Amlogic AXG TDM formatter driver"); MODULE_AUTHOR("Jerome Brunet <jbrunet@baylibre.com>"); MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index bf708717635b..62057c71f742 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -309,6 +309,7 @@ static int axg_tdm_iface_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct axg_tdm_iface *iface = snd_soc_dai_get_drvdata(dai); + struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); int ret; switch (iface->fmt & SND_SOC_DAIFMT_FORMAT_MASK) { @@ -346,7 +347,11 @@ static int axg_tdm_iface_hw_params(struct snd_pcm_substream *substream, return ret; } - return 0; + ret = axg_tdm_stream_set_cont_clocks(ts, iface->fmt); + if (ret) + dev_err(dai->dev, "failed to apply continuous clock setting\n"); + + return ret; } static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, @@ -354,19 +359,32 @@ static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, { struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); - /* Stop all attached formatters */ - axg_tdm_stream_stop(ts); - - return 0; + return axg_tdm_stream_set_cont_clocks(ts, 0); } -static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream, +static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *dai) { - struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); + struct axg_tdm_stream *ts = + snd_soc_dai_get_dma_data(dai, substream); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + axg_tdm_stream_start(ts); + break; + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_STOP: + axg_tdm_stream_stop(ts); + break; + default: + return -EINVAL; + } - /* Force all attached formatters to update */ - return axg_tdm_stream_reset(ts); + return 0; } static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai) @@ -412,8 +430,8 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, - .prepare = axg_tdm_iface_prepare, .hw_free = axg_tdm_iface_hw_free, + .trigger = axg_tdm_iface_trigger, }; /* TDM Backend DAIs */ diff --git a/sound/soc/meson/axg-tdm.h b/sound/soc/meson/axg-tdm.h index 42f7470b9a7f..daaca10fec9e 100644 --- a/sound/soc/meson/axg-tdm.h +++ b/sound/soc/meson/axg-tdm.h @@ -58,12 +58,17 @@ struct axg_tdm_stream { unsigned int physical_width; u32 *mask; bool ready; + + /* For continuous clock tracking */ + bool clk_enabled; }; struct axg_tdm_stream *axg_tdm_stream_alloc(struct axg_tdm_iface *iface); void axg_tdm_stream_free(struct axg_tdm_stream *ts); int axg_tdm_stream_start(struct axg_tdm_stream *ts); void axg_tdm_stream_stop(struct axg_tdm_stream *ts); +int axg_tdm_stream_set_cont_clocks(struct axg_tdm_stream *ts, + unsigned int fmt); static inline int axg_tdm_stream_reset(struct axg_tdm_stream *ts) { diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index cc84d4c81be9..238bda5f6b76 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -350,7 +350,9 @@ static int sof_init_environment(struct snd_sof_dev *sdev) } ret = sof_select_ipc_and_paths(sdev); - if (!ret && plat_data->ipc_type != base_profile->ipc_type) { + if (ret) { + goto err_machine_check; + } else if (plat_data->ipc_type != base_profile->ipc_type) { /* IPC type changed, re-initialize the ops */ sof_ops_free(sdev); diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index 7c8aafca8fde..7275437ea8d8 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -330,14 +330,32 @@ EXPORT_SYMBOL_GPL(snd_sof_dbg_memory_info_init); int snd_sof_dbg_init(struct snd_sof_dev *sdev) { + struct snd_sof_pdata *plat_data = sdev->pdata; struct snd_sof_dsp_ops *ops = sof_ops(sdev); const struct snd_sof_debugfs_map *map; + struct dentry *fw_profile; int i; int err; /* use "sof" as top level debugFS dir */ sdev->debugfs_root = debugfs_create_dir("sof", NULL); + /* expose firmware/topology prefix/names for test purposes */ + fw_profile = debugfs_create_dir("fw_profile", sdev->debugfs_root); + + debugfs_create_str("fw_path", 0444, fw_profile, + (char **)&plat_data->fw_filename_prefix); + debugfs_create_str("fw_lib_path", 0444, fw_profile, + (char **)&plat_data->fw_lib_prefix); + debugfs_create_str("tplg_path", 0444, fw_profile, + (char **)&plat_data->tplg_filename_prefix); + debugfs_create_str("fw_name", 0444, fw_profile, + (char **)&plat_data->fw_filename); + debugfs_create_str("tplg_name", 0444, fw_profile, + (char **)&plat_data->tplg_filename); + debugfs_create_u32("ipc_type", 0444, fw_profile, + (u32 *)&plat_data->ipc_type); + /* init dfsentry list */ INIT_LIST_HEAD(&sdev->dfsentry_list); diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c index b26ffe767fab..b14e508f1f31 100644 --- a/sound/soc/sof/intel/pci-lnl.c +++ b/sound/soc/sof/intel/pci-lnl.c @@ -35,6 +35,9 @@ static const struct sof_dev_desc lnl_desc = { .default_fw_path = { [SOF_IPC_TYPE_4] = "intel/sof-ipc4/lnl", }, + .default_lib_path = { + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/lnl", + }, .default_tplg_path = { [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, diff --git a/sound/soc/sof/ipc3-pcm.c b/sound/soc/sof/ipc3-pcm.c index 35769dd7905e..af0bf354cb20 100644 --- a/sound/soc/sof/ipc3-pcm.c +++ b/sound/soc/sof/ipc3-pcm.c @@ -434,4 +434,5 @@ const struct sof_ipc_pcm_ops ipc3_pcm_ops = { .trigger = sof_ipc3_pcm_trigger, .dai_link_fixup = sof_ipc3_pcm_dai_link_fixup, .reset_hw_params_during_stop = true, + .d0i3_supported_in_s0ix = true, }; diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index e915f9f87a6c..4594470ed08b 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -37,6 +37,25 @@ struct sof_ipc4_timestamp_info { snd_pcm_sframes_t delay; }; +/** + * struct sof_ipc4_pcm_stream_priv - IPC4 specific private data + * @time_info: pointer to time info struct if it is supported, otherwise NULL + * @chain_dma_allocated: indicates the ChainDMA allocation state + */ +struct sof_ipc4_pcm_stream_priv { + struct sof_ipc4_timestamp_info *time_info; + + bool chain_dma_allocated; +}; + +static inline struct sof_ipc4_timestamp_info * +sof_ipc4_sps_to_time_info(struct snd_sof_pcm_stream *sps) +{ + struct sof_ipc4_pcm_stream_priv *stream_priv = sps->private; + + return stream_priv->time_info; +} + static int sof_ipc4_set_multi_pipeline_state(struct snd_sof_dev *sdev, u32 state, struct ipc4_pipeline_set_state_data *trigger_list) { @@ -253,14 +272,17 @@ sof_ipc4_update_pipeline_state(struct snd_sof_dev *sdev, int state, int cmd, */ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev, - int direction, + struct snd_sof_pcm *spcm, int direction, struct snd_sof_pcm_stream_pipeline_list *pipeline_list, int state, int cmd) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; + struct sof_ipc4_pcm_stream_priv *stream_priv; bool allocate, enable, set_fifo_size; struct sof_ipc4_msg msg = {{ 0 }}; - int i; + int ret, i; + + stream_priv = spcm->stream[direction].private; switch (state) { case SOF_IPC4_PIPE_RUNNING: /* Allocate and start chained dma */ @@ -281,6 +303,11 @@ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev, set_fifo_size = false; break; case SOF_IPC4_PIPE_RESET: /* Disable and free chained DMA. */ + + /* ChainDMA can only be reset if it has been allocated */ + if (!stream_priv->chain_dma_allocated) + return 0; + allocate = false; enable = false; set_fifo_size = false; @@ -338,7 +365,12 @@ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev, if (enable) msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_ENABLE_MASK; - return sof_ipc_tx_message_no_reply(sdev->ipc, &msg, 0); + ret = sof_ipc_tx_message_no_reply(sdev->ipc, &msg, 0); + /* Update the ChainDMA allocation state */ + if (!ret) + stream_priv->chain_dma_allocated = allocate; + + return ret; } static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, @@ -378,7 +410,7 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, * trigger function that handles the rest for the substream. */ if (pipeline->use_chain_dma) - return sof_ipc4_chain_dma_trigger(sdev, substream->stream, + return sof_ipc4_chain_dma_trigger(sdev, spcm, substream->stream, pipeline_list, state, cmd); /* allocate memory for the pipeline data */ @@ -452,7 +484,7 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, * Invalidate the stream_start_offset to make sure that it is * going to be updated if the stream resumes */ - time_info = spcm->stream[substream->stream].private; + time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]); if (time_info) time_info->stream_start_offset = SOF_IPC4_INVALID_STREAM_POSITION; @@ -706,12 +738,16 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, static void sof_ipc4_pcm_free(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm) { struct snd_sof_pcm_stream_pipeline_list *pipeline_list; + struct sof_ipc4_pcm_stream_priv *stream_priv; int stream; for_each_pcm_streams(stream) { pipeline_list = &spcm->stream[stream].pipeline_list; kfree(pipeline_list->pipelines); pipeline_list->pipelines = NULL; + + stream_priv = spcm->stream[stream].private; + kfree(stream_priv->time_info); kfree(spcm->stream[stream].private); spcm->stream[stream].private = NULL; } @@ -721,7 +757,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm { struct snd_sof_pcm_stream_pipeline_list *pipeline_list; struct sof_ipc4_fw_data *ipc4_data = sdev->private; - struct sof_ipc4_timestamp_info *stream_info; + struct sof_ipc4_pcm_stream_priv *stream_priv; + struct sof_ipc4_timestamp_info *time_info; bool support_info = true; u32 abi_version; u32 abi_offset; @@ -749,33 +786,41 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm return -ENOMEM; } + stream_priv = kzalloc(sizeof(*stream_priv), GFP_KERNEL); + if (!stream_priv) { + sof_ipc4_pcm_free(sdev, spcm); + return -ENOMEM; + } + + spcm->stream[stream].private = stream_priv; + if (!support_info) continue; - stream_info = kzalloc(sizeof(*stream_info), GFP_KERNEL); - if (!stream_info) { + time_info = kzalloc(sizeof(*time_info), GFP_KERNEL); + if (!time_info) { sof_ipc4_pcm_free(sdev, spcm); return -ENOMEM; } - spcm->stream[stream].private = stream_info; + stream_priv->time_info = time_info; } return 0; } -static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pcm_stream *spcm) +static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pcm_stream *sps) { struct sof_ipc4_copier *host_copier = NULL; struct sof_ipc4_copier *dai_copier = NULL; struct sof_ipc4_llp_reading_slot llp_slot; - struct sof_ipc4_timestamp_info *info; + struct sof_ipc4_timestamp_info *time_info; struct snd_soc_dapm_widget *widget; struct snd_sof_dai *dai; int i; /* find host & dai to locate info in memory window */ - for_each_dapm_widgets(spcm->list, i, widget) { + for_each_dapm_widgets(sps->list, i, widget) { struct snd_sof_widget *swidget = widget->dobj.private; if (!swidget) @@ -795,44 +840,44 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc return; } - info = spcm->private; - info->host_copier = host_copier; - info->dai_copier = dai_copier; - info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_gpdma_reading_slots) + - sdev->fw_info_box.offset; + time_info = sof_ipc4_sps_to_time_info(sps); + time_info->host_copier = host_copier; + time_info->dai_copier = dai_copier; + time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers, + llp_gpdma_reading_slots) + sdev->fw_info_box.offset; /* find llp slot used by current dai */ for (i = 0; i < SOF_IPC4_MAX_LLP_GPDMA_READING_SLOTS; i++) { - sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot)); + sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot)); if (llp_slot.node_id == dai_copier->data.gtw_cfg.node_id) break; - info->llp_offset += sizeof(llp_slot); + time_info->llp_offset += sizeof(llp_slot); } if (i < SOF_IPC4_MAX_LLP_GPDMA_READING_SLOTS) return; /* if no llp gpdma slot is used, check aggregated sdw slot */ - info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_sndw_reading_slots) + - sdev->fw_info_box.offset; + time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers, + llp_sndw_reading_slots) + sdev->fw_info_box.offset; for (i = 0; i < SOF_IPC4_MAX_LLP_SNDW_READING_SLOTS; i++) { - sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot)); + sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot)); if (llp_slot.node_id == dai_copier->data.gtw_cfg.node_id) break; - info->llp_offset += sizeof(llp_slot); + time_info->llp_offset += sizeof(llp_slot); } if (i < SOF_IPC4_MAX_LLP_SNDW_READING_SLOTS) return; /* check EVAD slot */ - info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_evad_reading_slot) + - sdev->fw_info_box.offset; - sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot)); + time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers, + llp_evad_reading_slot) + sdev->fw_info_box.offset; + sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot)); if (llp_slot.node_id != dai_copier->data.gtw_cfg.node_id) - info->llp_offset = 0; + time_info->llp_offset = 0; } static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component, @@ -849,7 +894,7 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component, if (!spcm) return -EINVAL; - time_info = spcm->stream[substream->stream].private; + time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]); /* delay calculation is not supported by current fw_reg ABI */ if (!time_info) return 0; @@ -864,7 +909,7 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component, static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream, - struct snd_sof_pcm_stream *stream, + struct snd_sof_pcm_stream *sps, struct sof_ipc4_timestamp_info *time_info) { struct sof_ipc4_copier *host_copier = time_info->host_copier; @@ -918,7 +963,7 @@ static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, struct sof_ipc4_timestamp_info *time_info; struct sof_ipc4_llp_reading_slot llp; snd_pcm_uframes_t head_cnt, tail_cnt; - struct snd_sof_pcm_stream *stream; + struct snd_sof_pcm_stream *sps; u64 dai_cnt, host_cnt, host_ptr; struct snd_sof_pcm *spcm; int ret; @@ -927,8 +972,8 @@ static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, if (!spcm) return -EOPNOTSUPP; - stream = &spcm->stream[substream->stream]; - time_info = stream->private; + sps = &spcm->stream[substream->stream]; + time_info = sof_ipc4_sps_to_time_info(sps); if (!time_info) return -EOPNOTSUPP; @@ -938,7 +983,7 @@ static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, * the statistics is complete. And it will not change after the first initiailization. */ if (time_info->stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) { - ret = sof_ipc4_get_stream_start_offset(sdev, substream, stream, time_info); + ret = sof_ipc4_get_stream_start_offset(sdev, substream, sps, time_info); if (ret < 0) return -EOPNOTSUPP; } @@ -1030,15 +1075,13 @@ static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component, { struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct sof_ipc4_timestamp_info *time_info; - struct snd_sof_pcm_stream *stream; struct snd_sof_pcm *spcm; spcm = snd_sof_find_spcm_dai(component, rtd); if (!spcm) return 0; - stream = &spcm->stream[substream->stream]; - time_info = stream->private; + time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]); /* * Report the stored delay value calculated in the pointer callback. * In the unlikely event that the calculation was skipped/aborted, the diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index f03cee94bce6..8804e00e7251 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -325,14 +325,13 @@ static int sof_pcm_trigger(struct snd_soc_component *component, ipc_first = true; break; case SNDRV_PCM_TRIGGER_SUSPEND: - if (sdev->system_suspend_target == SOF_SUSPEND_S0IX && + /* + * If DSP D0I3 is allowed during S0iX, set the suspend_ignored flag for + * D0I3-compatible streams to keep the firmware pipeline running + */ + if (pcm_ops && pcm_ops->d0i3_supported_in_s0ix && + sdev->system_suspend_target == SOF_SUSPEND_S0IX && spcm->stream[substream->stream].d0i3_compatible) { - /* - * trap the event, not sending trigger stop to - * prevent the FW pipelines from being stopped, - * and mark the flag to ignore the upcoming DAPM - * PM events. - */ spcm->stream[substream->stream].suspend_ignored = true; return 0; } diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index 86bbb531e142..499b6084b526 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -116,6 +116,7 @@ struct snd_sof_dai_config_data { * triggers. The FW keeps the host DMA running in this case and * therefore the host must do the same and should stop the DMA during * hw_free. + * @d0i3_supported_in_s0ix: Allow DSP D0I3 during S0iX */ struct sof_ipc_pcm_ops { int (*hw_params)(struct snd_soc_component *component, struct snd_pcm_substream *substream, @@ -135,6 +136,7 @@ struct sof_ipc_pcm_ops { bool reset_hw_params_during_stop; bool ipc_first_on_start; bool platform_stop_during_hw_free; + bool d0i3_supported_in_s0ix; }; /** diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c index aa37c4ab0adb..21cd41fec7a9 100644 --- a/sound/soc/tegra/tegra186_dspk.c +++ b/sound/soc/tegra/tegra186_dspk.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // tegra186_dspk.c - Tegra186 DSPK driver -// -// Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. #include <linux/clk.h> #include <linux/device.h> @@ -241,14 +240,14 @@ static int tegra186_dspk_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - cif_conf.client_bits = TEGRA_ACIF_BITS_24; - switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_16; + cif_conf.client_bits = TEGRA_ACIF_BITS_16; break; case SNDRV_PCM_FORMAT_S32_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_32; + cif_conf.client_bits = TEGRA_ACIF_BITS_24; break; default: dev_err(dev, "unsupported format!\n"); diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index b892d66f7847..1e760c315521 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -2417,12 +2417,6 @@ static int davinci_mcasp_probe(struct platform_device *pdev) mcasp_reparent_fck(pdev); - ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, - &davinci_mcasp_dai[mcasp->op_mode], 1); - - if (ret != 0) - goto err; - ret = davinci_mcasp_get_dma_type(mcasp); switch (ret) { case PCM_EDMA: @@ -2449,6 +2443,12 @@ static int davinci_mcasp_probe(struct platform_device *pdev) goto err; } + ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, + &davinci_mcasp_dai[mcasp->op_mode], 1); + + if (ret != 0) + goto err; + no_audio: ret = davinci_mcasp_init_gpiochip(mcasp); if (ret) { diff --git a/tools/Makefile b/tools/Makefile index 37e9f6804832..276f5d0d53a4 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -11,7 +11,6 @@ help: @echo '' @echo ' acpi - ACPI tools' @echo ' bpf - misc BPF tools' - @echo ' cgroup - cgroup tools' @echo ' counter - counter tools' @echo ' cpupower - a tool for all things x86 CPU power' @echo ' debugging - tools for debugging' @@ -69,7 +68,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE +counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE $(call descend,$@) bpf/%: FORCE @@ -116,7 +115,7 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) -all: acpi cgroup counter cpupower gpio hv firewire \ +all: acpi counter cpupower gpio hv firewire \ perf selftests bootconfig spi turbostat usb \ virtio mm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ @@ -128,7 +127,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: +counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) selftests_install: @@ -155,7 +154,7 @@ freefall_install: kvm_stat_install: $(call descend,kvm/$(@:_install=),install) -install: acpi_install cgroup_install counter_install cpupower_install gpio_install \ +install: acpi_install counter_install cpupower_install gpio_install \ hv_install firewire_install iio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install mm_install bpf_install x86_energy_perf_policy_install \ @@ -169,7 +168,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: +counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) libapi_clean: @@ -209,7 +208,7 @@ freefall_clean: build_clean: $(call descend,build,clean) -clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \ +clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean \ diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 4fa4ade1ce74..540c0f2c4fda 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -121,7 +121,7 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) int i, n; /* recognize hard coded LLVM section name */ - if (strcmp(sec_name, ".arena.1") == 0) { + if (strcmp(sec_name, ".addr_space.1") == 0) { /* this is the name to use in skeleton */ snprintf(buf, buf_sz, "arena"); return true; diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 72535f00572f..72ea363d434d 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -3,6 +3,8 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include <linux/types.h> /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index efab29b8935b..a2061fcd612d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -498,7 +498,7 @@ struct bpf_struct_ops { #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" #define STRUCT_OPS_LINK_SEC ".struct_ops.link" -#define ARENA_SEC ".arena.1" +#define ARENA_SEC ".addr_space.1" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -1650,6 +1650,10 @@ static int sys_memfd_create(const char *name, unsigned flags) return syscall(__NR_memfd_create, name, flags); } +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + static int create_placeholder_fd(void) { int fd; @@ -5352,8 +5356,8 @@ retry: goto err_out; } if (map->def.type == BPF_MAP_TYPE_ARENA) { - map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), - PROT_READ | PROT_WRITE, + map->mmaped = mmap((void *)(long)map->map_extra, + bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, map->fd, 0); if (map->mmaped == MAP_FAILED) { diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 6b7eb2d2aaf1..a451cbfbd781 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -228,8 +228,11 @@ class Type(SpecAttr): presence = '' for i in range(0, len(ref)): presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}" - if self.presence_type() == 'bit': - code.append(presence + ' = 1;') + # Every layer below last is a nest, so we know it uses bit presence + # last layer is "self" and may be a complex type + if i == len(ref) - 1 and self.presence_type() != 'bit': + continue + code.append(presence + ' = 1;') code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0b10ad008668..0a33d9195b7a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -585,7 +585,7 @@ static int add_dead_ends(struct objtool_file *file) struct section *rsec; struct reloc *reloc; struct instruction *insn; - unsigned long offset; + uint64_t offset; /* * Check for manually annotated dead ends. diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index aa5ec149f96c..b3b00269a52a 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -28,6 +28,8 @@ CONFIG_MCTP_FLOWS=y CONFIG_INET=y CONFIG_MPTCP=y +CONFIG_NETDEVICES=y +CONFIG_WLAN=y CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_WLAN_VENDOR_INTEL=y @@ -38,6 +40,7 @@ CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DEBUG_FS=y CONFIG_DAMON_DBGFS=y +CONFIG_DAMON_DBGFS_DEPRECATED=y CONFIG_REGMAP_BUILD=y diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index bcf195c64a45..567491f3e1b5 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -32,7 +32,7 @@ */ #endif -#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) #define __arena __attribute__((address_space(1))) #define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ #define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c index 0766702de846..d69fd2465f53 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_htab.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c @@ -3,12 +3,14 @@ #include <test_progs.h> #include <sys/mman.h> #include <network_helpers.h> - +#include <sys/user.h> +#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */ +#include <unistd.h> +#define PAGE_SIZE getpagesize() +#endif #include "arena_htab_asm.skel.h" #include "arena_htab.skel.h" -#define PAGE_SIZE 4096 - #include "bpf_arena_htab.h" static void test_arena_htab_common(struct htab *htab) diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c index e61886debab1..d15867cddde0 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_list.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c @@ -3,8 +3,11 @@ #include <test_progs.h> #include <sys/mman.h> #include <network_helpers.h> - -#define PAGE_SIZE 4096 +#include <sys/user.h> +#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */ +#include <unistd.h> +#define PAGE_SIZE getpagesize() +#endif #include "bpf_arena_list.h" #include "arena_list.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index 053f4d6da77a..cc184e4420f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <sys/syscall.h> +#include <limits.h> #include <test_progs.h> #include "bloom_filter_map.skel.h" @@ -21,6 +22,11 @@ static void test_fail_cases(void) if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0")) close(fd); + /* Invalid value size: too big */ + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large")) + close(fd); + /* Invalid max entries size */ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL); if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size")) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 985273832f89..c4f9f306646e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -5,6 +5,7 @@ #include "cap_helpers.h" #include "verifier_and.skel.h" #include "verifier_arena.skel.h" +#include "verifier_arena_large.skel.h" #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" #include "verifier_bitfield_write.skel.h" @@ -120,6 +121,7 @@ static void run_tests_aux(const char *skel_name, void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_arena(void) { RUN(verifier_arena); } +void test_verifier_arena_large(void) { RUN(verifier_arena_large); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); } void test_verifier_bounds(void) { RUN(verifier_bounds); } diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c index b7bb712cacfd..1e6ac187a6a0 100644 --- a/tools/testing/selftests/bpf/progs/arena_htab.c +++ b/tools/testing/selftests/bpf/progs/arena_htab.c @@ -22,7 +22,7 @@ int zero = 0; SEC("syscall") int arena_htab_llvm(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM) struct htab __arena *htab; __u64 i; diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c index cd35b8448435..c0422c58cee2 100644 --- a/tools/testing/selftests/bpf/progs/arena_list.c +++ b/tools/testing/selftests/bpf/progs/arena_list.c @@ -30,13 +30,13 @@ int list_sum; int cnt; bool skip = false; -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST long __arena arena_sum; int __arena test_val = 1; struct arena_list_head __arena global_head; #else -long arena_sum SEC(".arena.1"); -int test_val SEC(".arena.1"); +long arena_sum SEC(".addr_space.1"); +int test_val SEC(".addr_space.1"); #endif int zero; @@ -44,7 +44,7 @@ int zero; SEC("syscall") int arena_list_add(void *ctx) { -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST __u64 i; list_head = &global_head; @@ -66,7 +66,7 @@ int arena_list_add(void *ctx) SEC("syscall") int arena_list_del(void *ctx) { -#ifdef __BPF_FEATURE_ARENA_CAST +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST struct elem __arena *n; int sum = 0; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 5540b05ff9ee..93144ae6df74 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -12,14 +12,18 @@ struct { __uint(type, BPF_MAP_TYPE_ARENA); __uint(map_flags, BPF_F_MMAPABLE); __uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/ - __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */ +#ifdef __TARGET_ARCH_arm64 + __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */ +#else + __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */ +#endif } arena SEC(".maps"); SEC("syscall") __success __retval(0) int basic_alloc1(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) volatile int __arena *page1, *page2, *no_page, *page3; page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); @@ -58,7 +62,7 @@ SEC("syscall") __success __retval(0) int basic_alloc2(void *ctx) { -#if defined(__BPF_FEATURE_ARENA_CAST) +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) volatile char __arena *page1, *page2, *page3, *page4; page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0); diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c new file mode 100644 index 000000000000..ef66ea460264 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" +#include "bpf_arena_common.h" + +#define ARENA_SIZE (1ull << 32) + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, ARENA_SIZE / PAGE_SIZE); +} arena SEC(".maps"); + +SEC("syscall") +__success __retval(0) +int big_alloc1(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page1, *page2, *no_page, *page3; + void __arena *base; + + page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page1) + return 1; + *page1 = 1; + page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, + 1, NUMA_NO_NODE, 0); + if (!page2) + return 2; + *page2 = 2; + no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE, + 1, NUMA_NO_NODE, 0); + if (no_page) + return 3; + if (*page1 != 1) + return 4; + if (*page2 != 2) + return 5; + bpf_arena_free_pages(&arena, (void __arena *)page1, 1); + if (*page2 != 2) + return 6; + if (*page1 != 0) /* use-after-free should return 0 */ + return 7; + page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page3) + return 8; + *page3 = 3; + if (page1 != page3) + return 9; + if (*page2 != 2) + return 10; + if (*(page1 + PAGE_SIZE) != 0) + return 11; + if (*(page1 - PAGE_SIZE) != 0) + return 12; + if (*(page2 + PAGE_SIZE) != 0) + return 13; + if (*(page2 - PAGE_SIZE) != 0) + return 14; +#endif + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/dmabuf-heaps/config b/tools/testing/selftests/dmabuf-heaps/config new file mode 100644 index 000000000000..be091f1cdfa0 --- /dev/null +++ b/tools/testing/selftests/dmabuf-heaps/config @@ -0,0 +1,3 @@ +CONFIG_DMABUF_HEAPS=y +CONFIG_DMABUF_HEAPS_SYSTEM=y +CONFIG_DRM_VGEM=y diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings new file mode 100644 index 000000000000..a62d2fa1275c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/settings @@ -0,0 +1 @@ +timeout=600 diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index a0b8688b0836..fb4472ddffd8 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -19,8 +19,8 @@ include ../lib.mk $(OUTPUT)/subdir: mkdir -p $@ -$(OUTPUT)/script: - echo '#!/bin/sh' > $@ +$(OUTPUT)/script: Makefile + echo '#!/bin/bash' > $@ echo 'exit $$*' >> $@ chmod +x $@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat diff --git a/tools/testing/selftests/exec/binfmt_script.py b/tools/testing/selftests/exec/binfmt_script.py index 05f94a741c7a..2c575a2c0eab 100755 --- a/tools/testing/selftests/exec/binfmt_script.py +++ b/tools/testing/selftests/exec/binfmt_script.py @@ -16,6 +16,8 @@ SIZE=256 NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."])) test_num=0 +pass_num=0 +fail_num=0 code='''#!/usr/bin/perl print "Executed interpreter! Args:\n"; @@ -42,7 +44,7 @@ foreach my $a (@ARGV) { # ... def test(name, size, good=True, leading="", root="./", target="/perl", fill="A", arg="", newline="\n", hashbang="#!"): - global test_num, tests, NAME_MAX + global test_num, pass_num, fail_num, tests, NAME_MAX test_num += 1 if test_num > tests: raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)" @@ -80,16 +82,20 @@ def test(name, size, good=True, leading="", root="./", target="/perl", if good: print("ok %d - binfmt_script %s (successful good exec)" % (test_num, name)) + pass_num += 1 else: print("not ok %d - binfmt_script %s succeeded when it should have failed" % (test_num, name)) + fail_num = 1 else: if good: print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)" % (test_num, name, proc.returncode)) + fail_num = 1 else: print("ok %d - binfmt_script %s (correctly failed bad exec)" % (test_num, name)) + pass_num += 1 # Clean up crazy binaries os.unlink(script) @@ -166,6 +172,8 @@ test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ") test(name="two-under-leading", size=int(SIZE/2), leading=" ") test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ") +print("# Totals: pass:%d fail:%d xfail:0 xpass:0 skip:0 error:0" % (pass_num, fail_num)) + if test_num != tests: raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d" % (test_num, tests)) diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 0546ca24f2b2..6418ded40bdd 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -98,10 +98,9 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags, if (child == 0) { /* Child: do execveat(). */ rc = execveat_(fd, path, argv, envp, flags); - ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n", + ksft_print_msg("child execveat() failed, rc=%d errno=%d (%s)\n", rc, errno, strerror(errno)); - ksft_test_result_fail("%s\n", test_name); - exit(1); /* should not reach here */ + exit(errno); } /* Parent: wait for & check child's exit status. */ rc = waitpid(child, &status, 0); @@ -226,11 +225,14 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script) * "If the command name is found, but it is not an executable utility, * the exit status shall be 126."), so allow either. */ - if (is_script) + if (is_script) { + ksft_print_msg("Invoke script via root_dfd and relative filename\n"); fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0, 127, 126); - else + } else { + ksft_print_msg("Invoke exec via root_dfd and relative filename\n"); fail += check_execveat(root_dfd, longpath + 1, 0); + } return fail; } diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c index d487c2f6a615..17e3207d34ae 100644 --- a/tools/testing/selftests/exec/load_address.c +++ b/tools/testing/selftests/exec/load_address.c @@ -5,6 +5,7 @@ #include <link.h> #include <stdio.h> #include <stdlib.h> +#include "../kselftest.h" struct Statistics { unsigned long long load_address; @@ -41,28 +42,23 @@ int main(int argc, char **argv) unsigned long long misalign; int ret; + ksft_print_header(); + ksft_set_plan(1); + ret = dl_iterate_phdr(ExtractStatistics, &extracted); - if (ret != 1) { - fprintf(stderr, "FAILED\n"); - return 1; - } + if (ret != 1) + ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n"); - if (extracted.alignment == 0) { - fprintf(stderr, "No alignment found\n"); - return 1; - } else if (extracted.alignment & (extracted.alignment - 1)) { - fprintf(stderr, "Alignment is not a power of 2\n"); - return 1; - } + if (extracted.alignment == 0) + ksft_exit_fail_msg("FAILED: No alignment found\n"); + else if (extracted.alignment & (extracted.alignment - 1)) + ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n"); misalign = extracted.load_address & (extracted.alignment - 1); - if (misalign) { - printf("alignment = %llu, load_address = %llu\n", - extracted.alignment, extracted.load_address); - fprintf(stderr, "FAILED\n"); - return 1; - } + if (misalign) + ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n", + extracted.alignment, extracted.load_address); - fprintf(stderr, "PASS\n"); - return 0; + ksft_test_result_pass("Completed\n"); + ksft_finished(); } diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c index 2dbd5bc45b3e..b2f37d86a5f6 100644 --- a/tools/testing/selftests/exec/recursion-depth.c +++ b/tools/testing/selftests/exec/recursion-depth.c @@ -23,45 +23,44 @@ #include <fcntl.h> #include <sys/mount.h> #include <unistd.h> +#include "../kselftest.h" int main(void) { + int fd, rv; + + ksft_print_header(); + ksft_set_plan(1); + if (unshare(CLONE_NEWNS) == -1) { if (errno == ENOSYS || errno == EPERM) { - fprintf(stderr, "error: unshare, errno %d\n", errno); - return 4; + ksft_test_result_skip("error: unshare, errno %d\n", errno); + ksft_finished(); } - fprintf(stderr, "error: unshare, errno %d\n", errno); - return 1; - } - if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { - fprintf(stderr, "error: mount '/', errno %d\n", errno); - return 1; + ksft_exit_fail_msg("error: unshare, errno %d\n", errno); } + + if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1) + ksft_exit_fail_msg("error: mount '/', errno %d\n", errno); + /* Require "exec" filesystem. */ - if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) { - fprintf(stderr, "error: mount ramfs, errno %d\n", errno); - return 1; - } + if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) + ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno); #define FILENAME "/tmp/1" - int fd = creat(FILENAME, 0700); - if (fd == -1) { - fprintf(stderr, "error: creat, errno %d\n", errno); - return 1; - } + fd = creat(FILENAME, 0700); + if (fd == -1) + ksft_exit_fail_msg("error: creat, errno %d\n", errno); + #define S "#!" FILENAME "\n" - if (write(fd, S, strlen(S)) != strlen(S)) { - fprintf(stderr, "error: write, errno %d\n", errno); - return 1; - } + if (write(fd, S, strlen(S)) != strlen(S)) + ksft_exit_fail_msg("error: write, errno %d\n", errno); + close(fd); - int rv = execve(FILENAME, NULL, NULL); - if (rv == -1 && errno == ELOOP) { - return 0; - } - fprintf(stderr, "error: execve, rv %d, errno %d\n", rv, errno); - return 1; + rv = execve(FILENAME, NULL, NULL); + ksft_test_result(rv == -1 && errno == ELOOP, + "execve failed as expected (ret %d, errno %d)\n", rv, errno); + ksft_finished(); } diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 2de7c61d1ae3..3f74c09c56b6 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -24,7 +24,7 @@ echo 0 > events/enable echo "Get the most frequently calling function" sample_events -target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` +target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` if [ -z "$target_func" ]; then exit_fail fi diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index ddba2c2fb5de..4eaba83cdcf3 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -135,8 +135,8 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data, irq_iter = READ_ONCE(shared_data->nr_iter); __GUEST_ASSERT(config_iter + 1 == irq_iter, - "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n" - " Guest timer interrupt was not trigged within the specified\n" + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" " interval, try to increase the error margin by [-e] option.\n", config_iter + 1, irq_iter); } diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 3bd03b088dda..81ce37ec407d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1037,8 +1037,19 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, uint32_t value); +void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); + +static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + struct kvm_cpuid_entry2 *entry; + + entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); + return *((&entry->eax) + feature.reg) & BIT(feature.bit); +} + void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_feature feature, bool set); diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index e22848f747c0..0f9cabd99fd4 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -60,7 +60,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data) irq_iter = READ_ONCE(shared_data->nr_iter); __GUEST_ASSERT(config_iter + 1 == irq_iter, "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" - " Guest timer interrupt was not trigged within the specified\n" + " Guest timer interrupt was not triggered within the specified\n" " interval, try to increase the error margin by [-e] option.\n", config_iter + 1, irq_iter); } diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 9e2879af7c20..40cc59f4e650 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -133,6 +133,43 @@ static void enter_guest(struct kvm_vcpu *vcpu) } } +static void test_pv_unhalt(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_cpuid_entry2 *ent; + u32 kvm_sig_old; + + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + + TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + + /* KVM_PV_UNHALT test */ + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); + + /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + kvm_sig_old = ent->ebx; + ent->ebx = 0xdeadbeef; + vcpu_set_cpuid(vcpu); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + ent->ebx = kvm_sig_old; + vcpu_set_cpuid(vcpu); + + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + + /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ + + kvm_vm_free(vm); +} + int main(void) { struct kvm_vcpu *vcpu; @@ -151,4 +188,6 @@ int main(void) enter_guest(vcpu); kvm_vm_free(vm); + + test_pv_unhalt(); } diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c index cbe99594d319..18a49c70d4c6 100644 --- a/tools/testing/selftests/mm/gup_test.c +++ b/tools/testing/selftests/mm/gup_test.c @@ -203,7 +203,7 @@ int main(int argc, char **argv) ksft_print_header(); ksft_set_plan(nthreads); - filed = open(file, O_RDWR|O_CREAT); + filed = open(file, O_RDWR|O_CREAT, 0664); if (filed < 0) ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno)); diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index f822ae31af22..374a308174d2 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -1745,9 +1745,12 @@ void pkey_setup_shadow(void) shadow_pkey_reg = __read_pkey_reg(); } +pid_t parent_pid; + void restore_settings_atexit(void) { - cat_into_file(buf, "/proc/sys/vm/nr_hugepages"); + if (parent_pid == getpid()) + cat_into_file(buf, "/proc/sys/vm/nr_hugepages"); } void save_settings(void) @@ -1773,6 +1776,7 @@ void save_settings(void) exit(__LINE__); } + parent_pid = getpid(); atexit(restore_settings_atexit); close(fd); } diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index cc5f144430d4..7dbfa53d93a0 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -137,7 +137,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon) if (!map) ksft_exit_fail_msg("anon mmap failed\n"); } else { - test_fd = open(fname, O_RDWR | O_CREAT); + test_fd = open(fname, O_RDWR | O_CREAT, 0664); if (test_fd < 0) { ksft_test_result_skip("Test %s open() file failed\n", __func__); return; diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 856662d2f87a..6c988bd2f335 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -223,7 +223,7 @@ void split_file_backed_thp(void) ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); } - fd = open(testfile, O_CREAT|O_WRONLY); + fd = open(testfile, O_CREAT|O_WRONLY, 0664); if (fd == -1) { ksft_perror("Cannot open testing file"); goto cleanup; diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index b0ac0ec2356d..7ad6ba660c7d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -18,6 +18,7 @@ bool test_uffdio_wp = true; unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; +atomic_bool ready_for_fork; static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -518,6 +519,8 @@ void *uffd_poll_thread(void *arg) pollfd[1].fd = pipefd[cpu*2]; pollfd[1].events = POLLIN; + ready_for_fork = true; + for (;;) { ret = poll(pollfd, 2, -1); if (ret <= 0) { diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index cb055282c89c..cc5629c3d2aa 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -32,6 +32,7 @@ #include <inttypes.h> #include <stdint.h> #include <sys/random.h> +#include <stdatomic.h> #include "../kselftest.h" #include "vm_util.h" @@ -103,6 +104,7 @@ extern bool map_shared; extern bool test_uffdio_wp; extern unsigned long long *count_verify; extern volatile bool test_uffdio_copy_eexist; +extern atomic_bool ready_for_fork; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 2b9f8cc52639..21ec23206ab4 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -775,6 +775,8 @@ static void uffd_sigbus_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, @@ -790,6 +792,9 @@ static void uffd_sigbus_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); @@ -829,6 +834,8 @@ static void uffd_events_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, true, wp, false)) @@ -838,6 +845,9 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); @@ -1427,7 +1437,8 @@ uffd_test_case_t uffd_tests[] = { .uffd_fn = uffd_sigbus_wp_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_SIGBUS | - UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP, + UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP | + UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, { .name = "events", diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c02990bbd56f..9007c420d52c 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,7 +3,7 @@ #include <stdbool.h> #include <sys/mman.h> #include <err.h> -#include <string.h> /* ffsl() */ +#include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ #define BIT_ULL(nr) (1ULL << (nr)) diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index a2662348cdb1..b7b54d646b93 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -6,7 +6,9 @@ #include "../kselftest_harness.h" -struct in6_addr in6addr_v4mapped_any = { +static const __u32 in4addr_any = INADDR_ANY; +static const __u32 in4addr_loopback = INADDR_LOOPBACK; +static const struct in6_addr in6addr_v4mapped_any = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = { 0, 0, 0, 0 } }; - -struct in6_addr in6addr_v4mapped_loopback = { +static const struct in6_addr in6addr_v4mapped_loopback = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = { } }; +#define NR_SOCKETS 8 + FIXTURE(bind_wildcard) { - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; + int fd[NR_SOCKETS]; + socklen_t addrlen[NR_SOCKETS]; + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + } addr[NR_SOCKETS]; }; FIXTURE_VARIANT(bind_wildcard) { - const __u32 addr4_const; - const struct in6_addr *addr6_const; - int expected_errno; + sa_family_t family[2]; + const void *addr[2]; + bool ipv6_only[2]; + + /* 6 bind() calls below follow two bind() for the defined 2 addresses: + * + * 0.0.0.0 + * 127.0.0.1 + * :: + * ::1 + * ::ffff:0.0.0.0 + * ::ffff:127.0.0.1 + */ + int expected_errno[NR_SOCKETS]; + int expected_reuse_errno[NR_SOCKETS]; +}; + +/* (IPv4, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; +/* (IPv4, IPv6) */ FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv6, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv6, IPv6) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i, + int family, const void *addr_const) +{ + if (family == AF_INET) { + struct sockaddr_in *addr4 = &self->addr[i].addr4; + const __u32 *addr4_const = addr_const; + + addr4->sin_family = AF_INET; + addr4->sin_port = htons(0); + addr4->sin_addr.s_addr = htonl(*addr4_const); + + self->addrlen[i] = sizeof(struct sockaddr_in); + } else { + struct sockaddr_in6 *addr6 = &self->addr[i].addr6; + const struct in6_addr *addr6_const = addr_const; + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(0); + addr6->sin6_addr = *addr6_const; + + self->addrlen[i] = sizeof(struct sockaddr_in6); + } +} + FIXTURE_SETUP(bind_wildcard) { - self->addr4.sin_family = AF_INET; - self->addr4.sin_port = htons(0); - self->addr4.sin_addr.s_addr = htonl(variant->addr4_const); + setup_addr(self, 0, variant->family[0], variant->addr[0]); + setup_addr(self, 1, variant->family[1], variant->addr[1]); + + setup_addr(self, 2, AF_INET, &in4addr_any); + setup_addr(self, 3, AF_INET, &in4addr_loopback); - self->addr6.sin6_family = AF_INET6; - self->addr6.sin6_port = htons(0); - self->addr6.sin6_addr = *variant->addr6_const; + setup_addr(self, 4, AF_INET6, &in6addr_any); + setup_addr(self, 5, AF_INET6, &in6addr_loopback); + setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any); + setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback); } FIXTURE_TEARDOWN(bind_wildcard) { + int i; + + for (i = 0; i < NR_SOCKETS; i++) + close(self->fd[i]); } -void bind_sockets(struct __test_metadata *_metadata, - FIXTURE_DATA(bind_wildcard) *self, - int expected_errno, - struct sockaddr *addr1, socklen_t addrlen1, - struct sockaddr *addr2, socklen_t addrlen2) +void bind_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_wildcard) *self, + const FIXTURE_VARIANT(bind_wildcard) *variant, + int i, int reuse) { - int fd[2]; int ret; - fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[0], 0); + self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0); + ASSERT_GT(self->fd[i], 0); - ret = bind(fd[0], addr1, addrlen1); - ASSERT_EQ(ret, 0); + if (i < 2 && variant->ipv6_only[i]) { + ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + } - ret = getsockname(fd[0], addr1, &addrlen1); - ASSERT_EQ(ret, 0); + if (i < 2 && reuse) { + ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + } - ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port; + self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port; - fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[1], 0); + ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]); - ret = bind(fd[1], addr2, addrlen2); - if (expected_errno) { - ASSERT_EQ(ret, -1); - ASSERT_EQ(errno, expected_errno); + if (reuse) { + if (variant->expected_reuse_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_reuse_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } } else { + if (variant->expected_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } + } + + if (i == 0) { + ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]); ASSERT_EQ(ret, 0); } +} - close(fd[1]); - close(fd[0]); +TEST_F(bind_wildcard, plain) +{ + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, 0); } -TEST_F(bind_wildcard, v4_v6) +TEST_F(bind_wildcard, reuseaddr) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr4, sizeof(self->addr4), - (struct sockaddr *)&self->addr6, sizeof(self->addr6)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEADDR); } -TEST_F(bind_wildcard, v6_v4) +TEST_F(bind_wildcard, reuseport) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr6, sizeof(self->addr6), - (struct sockaddr *)&self->addr4, sizeof(self->addr4)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEPORT); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 4c4248554826..4131f3263a48 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -383,12 +383,14 @@ do_transfer() local stat_cookierx_last local stat_csum_err_s local stat_csum_err_c + local stat_tcpfb_last_l stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -457,11 +459,13 @@ do_transfer() local stat_cookietx_now local stat_cookierx_now local stat_ooo_now + local stat_tcpfb_now_l stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -508,6 +512,11 @@ do_transfer() fi fi + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + mptcp_lib_pr_fail "unexpected fallback to TCP" + rets=1 + fi + if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then extra+=" WARN: CookieSent: did not advance" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 5e9211e89825..e4403236f655 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -729,7 +729,7 @@ pm_nl_check_endpoint() [ -n "$_flags" ]; flags="flags $_flags" shift elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $1" + [ -n "$2" ]; dev="dev $2" shift elif [ $1 = "id" ]; then _id=$2 @@ -3610,6 +3610,8 @@ endpoint_tests() local tests_pid=$! wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 chk_subflow_nr "before delete" 2 chk_mptcp_info subflows 1 subflows 1 diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b03..bfb07dc49518 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 74ff9fb2a6f0..58da5de99ac4 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -1177,6 +1177,7 @@ encap_params_common() local plen=$1; shift local enc_ethtype=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1195,11 +1196,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - no match" @@ -1212,20 +1213,20 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - no match" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - no match" @@ -1238,11 +1239,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - no match" @@ -1250,11 +1251,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - no match" @@ -1272,6 +1273,7 @@ encap_params_ipv4_ipv4() local plen=32 local enc_ethtype="ip" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1279,7 +1281,7 @@ encap_params_ipv4_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv4() @@ -1291,6 +1293,7 @@ encap_params_ipv6_ipv4() local plen=32 local enc_ethtype="ip" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1298,7 +1301,7 @@ encap_params_ipv6_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } encap_params_ipv4_ipv6() @@ -1310,6 +1313,7 @@ encap_params_ipv4_ipv6() local plen=128 local enc_ethtype="ipv6" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1317,7 +1321,7 @@ encap_params_ipv4_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv6() @@ -1329,6 +1333,7 @@ encap_params_ipv6_ipv6() local plen=128 local enc_ethtype="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1336,7 +1341,7 @@ encap_params_ipv6_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } starg_exclude_ir_common() @@ -1347,6 +1352,7 @@ starg_exclude_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1368,14 +1374,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1385,14 +1391,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1407,6 +1413,7 @@ starg_exclude_ir_ipv4_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1415,7 +1422,7 @@ starg_exclude_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv4() @@ -1426,6 +1433,7 @@ starg_exclude_ir_ipv6_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1434,7 +1442,7 @@ starg_exclude_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_ir_ipv4_ipv6() @@ -1445,6 +1453,7 @@ starg_exclude_ir_ipv4_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1453,7 +1462,7 @@ starg_exclude_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv6() @@ -1464,6 +1473,7 @@ starg_exclude_ir_ipv6_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1472,7 +1482,7 @@ starg_exclude_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_common() @@ -1483,6 +1493,7 @@ starg_include_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1504,14 +1515,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1521,14 +1532,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1543,6 +1554,7 @@ starg_include_ir_ipv4_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1551,7 +1563,7 @@ starg_include_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv4() @@ -1562,6 +1574,7 @@ starg_include_ir_ipv6_ipv4() local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1570,7 +1583,7 @@ starg_include_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_ipv4_ipv6() @@ -1581,6 +1594,7 @@ starg_include_ir_ipv4_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1589,7 +1603,7 @@ starg_include_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv6() @@ -1600,6 +1614,7 @@ starg_include_ir_ipv6_ipv6() local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1608,7 +1623,7 @@ starg_include_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_p2mp_common() @@ -1618,6 +1633,7 @@ starg_exclude_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1635,12 +1651,12 @@ starg_exclude_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1648,7 +1664,7 @@ starg_exclude_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } @@ -1660,6 +1676,7 @@ starg_exclude_p2mp_ipv4_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1667,7 +1684,7 @@ starg_exclude_p2mp_ipv4_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1678,6 +1695,7 @@ starg_exclude_p2mp_ipv6_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1685,7 +1703,7 @@ starg_exclude_p2mp_ipv6_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1696,6 +1714,7 @@ starg_exclude_p2mp_ipv4_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1703,7 +1722,7 @@ starg_exclude_p2mp_ipv4_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1714,6 +1733,7 @@ starg_exclude_p2mp_ipv6_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1721,7 +1741,7 @@ starg_exclude_p2mp_ipv6_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1732,6 +1752,7 @@ starg_include_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1749,12 +1770,12 @@ starg_include_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1762,7 +1783,7 @@ starg_include_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } @@ -1774,6 +1795,7 @@ starg_include_p2mp_ipv4_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1781,7 +1803,7 @@ starg_include_p2mp_ipv4_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1792,6 +1814,7 @@ starg_include_p2mp_ipv6_ipv4() local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1799,7 +1822,7 @@ starg_include_p2mp_ipv6_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1810,6 +1833,7 @@ starg_include_p2mp_ipv4_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1817,7 +1841,7 @@ starg_include_p2mp_ipv4_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } @@ -1828,6 +1852,7 @@ starg_include_p2mp_ipv6_ipv6() local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1835,7 +1860,7 @@ starg_include_p2mp_ipv6_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1847,6 +1872,7 @@ egress_vni_translation_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1882,20 +1908,20 @@ egress_vni_translation_common() # Make sure that packets sent from the first VTEP over VLAN 10 are # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on # the second VTEP, since it is configured as PVID. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - PVID configured" # Remove PVID flag from VLAN 4000 on the second VTEP and make sure # packets are no longer received by the SVI interface. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - no PVID configured" # Reconfigure the PVID and make sure packets are received again. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 log_test $? 0 "Egress VNI translation - PVID reconfigured" } @@ -1908,6 +1934,7 @@ egress_vni_translation_ipv4_ipv4() local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1915,7 +1942,7 @@ egress_vni_translation_ipv4_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv4() @@ -1926,6 +1953,7 @@ egress_vni_translation_ipv6_ipv4() local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1933,7 +1961,7 @@ egress_vni_translation_ipv6_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } egress_vni_translation_ipv4_ipv6() @@ -1944,6 +1972,7 @@ egress_vni_translation_ipv4_ipv6() local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1951,7 +1980,7 @@ egress_vni_translation_ipv4_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv6() @@ -1962,6 +1991,7 @@ egress_vni_translation_ipv6_ipv6() local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1969,7 +1999,7 @@ egress_vni_translation_ipv6_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } all_zeros_mdb_common() @@ -1982,12 +2012,18 @@ all_zeros_mdb_common() local vtep4_ip=$1; shift local plen=$1; shift local ipv4_grp=239.1.1.1 + local ipv4_grp_dmac=01:00:5e:01:01:01 local ipv4_unreg_grp=239.2.2.2 + local ipv4_unreg_grp_dmac=01:00:5e:02:02:02 local ipv4_ll_grp=224.0.0.100 + local ipv4_ll_grp_dmac=01:00:5e:00:00:64 local ipv4_src=192.0.2.129 local ipv6_grp=ff0e::1 + local ipv6_grp_dmac=33:33:00:00:00:01 local ipv6_unreg_grp=ff0e::2 + local ipv6_unreg_grp_dmac=33:33:00:00:00:02 local ipv6_ll_grp=ff02::1 + local ipv6_ll_grp_dmac=33:33:00:00:00:01 local ipv6_src=2001:db8:100::1 # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic @@ -2023,7 +2059,7 @@ all_zeros_mdb_common() # Send registered IPv4 multicast and make sure it only arrives to the # first VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Registered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -2031,7 +2067,7 @@ all_zeros_mdb_common() # Send unregistered IPv4 multicast that is not link-local and make sure # it arrives to the first and second VTEPs. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Unregistered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -2039,7 +2075,7 @@ all_zeros_mdb_common() # Send IPv4 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Link-local IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -2074,7 +2110,7 @@ all_zeros_mdb_common() # Send registered IPv6 multicast and make sure it only arrives to the # third VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 1 log_test $? 0 "Registered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 0 @@ -2082,7 +2118,7 @@ all_zeros_mdb_common() # Send unregistered IPv6 multicast that is not link-local and make sure # it arrives to the third and fourth VTEPs. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Unregistered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -2090,7 +2126,7 @@ all_zeros_mdb_common() # Send IPv6 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Link-local IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -2165,6 +2201,7 @@ mdb_fdb_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -2188,7 +2225,7 @@ mdb_fdb_common() # Send IP multicast traffic and make sure it is forwarded by the MDB # and only arrives to the first VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -2205,7 +2242,7 @@ mdb_fdb_common() # Remove the MDB entry and make sure that IP multicast is now forwarded # by the FDB to the second VTEP. run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 2 @@ -2221,14 +2258,15 @@ mdb_fdb_ipv4_ipv4() local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv4() @@ -2240,14 +2278,15 @@ mdb_fdb_ipv6_ipv4() local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_fdb_ipv4_ipv6() @@ -2259,14 +2298,15 @@ mdb_fdb_ipv4_ipv6() local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv6() @@ -2278,14 +2318,15 @@ mdb_fdb_ipv6_ipv6() local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_grp1_loop() @@ -2320,7 +2361,9 @@ mdb_torture_common() local vtep1_ip=$1; shift local vtep2_ip=$1; shift local grp1=$1; shift + local grp1_dmac=$1; shift local grp2=$1; shift + local grp2_dmac=$1; shift local src=$1; shift local mz=$1; shift local pid1 @@ -2345,9 +2388,9 @@ mdb_torture_common() pid1=$! mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & pid2=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid3=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid4=$! sleep 30 @@ -2363,15 +2406,17 @@ mdb_torture_ipv4_ipv4() local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv4() @@ -2380,15 +2425,17 @@ mdb_torture_ipv6_ipv4() local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } mdb_torture_ipv4_ipv6() @@ -2397,15 +2444,17 @@ mdb_torture_ipv4_ipv6() local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv6() @@ -2414,15 +2463,17 @@ mdb_torture_ipv6_ipv6() local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } ################################################################################ diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index c6eda21cefb6..f27a12d2a2c9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1615,6 +1615,40 @@ TEST_F(tls, getsockopt) EXPECT_EQ(errno, EINVAL); } +TEST_F(tls, recv_efault) +{ + char *rec1 = "1111111111"; + char *rec2 = "2222222222"; + struct msghdr hdr = {}; + struct iovec iov[2]; + char recv_mem[12]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, 10, 0), 10); + EXPECT_EQ(send(self->fd, rec2, 10, 0), 10); + + iov[0].iov_base = recv_mem; + iov[0].iov_len = sizeof(recv_mem); + iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */ + iov[1].iov_len = 1; + + hdr.msg_iovlen = 2; + hdr.msg_iov = iov; + + EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1); + EXPECT_EQ(recv_mem[0], rec1[0]); + + ret = recvmsg(self->cfd, &hdr, 0); + EXPECT_LE(ret, sizeof(recv_mem)); + EXPECT_GE(ret, 9); + EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0); + if (ret > 9) + EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); +} + FIXTURE(tls_err) { int fd, cfd; diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 380cb15e942e..83ed987cff34 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -244,7 +244,7 @@ for family in 4 6; do create_vxlan_pair ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check @@ -258,13 +258,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings index 6091b45d226b..a953c96aa16e 100644 --- a/tools/testing/selftests/seccomp/settings +++ b/tools/testing/selftests/seccomp/settings @@ -1 +1 @@ -timeout=120 +timeout=180 |