diff options
303 files changed, 10460 insertions, 3437 deletions
@@ -60,6 +60,10 @@ Arnd Bergmann <arnd@arndb.de> Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com> Axel Dyks <xl@xlsigned.net> Axel Lin <axel.lin@gmail.com> +Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@linaro.org> +Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@spreadtrum.com> +Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com> +Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com> Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com> Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com> Ben Gardner <bgardner@wabtec.com> @@ -135,6 +139,8 @@ Frank Rowand <frowand.list@gmail.com> <frowand@mvista.com> Frank Zago <fzago@systemfabricworks.com> Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com> Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com> +Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com> +Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com> Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com> Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com> Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@linux.vnet.ibm.com> @@ -371,6 +377,7 @@ Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk> Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk> Sebastian Reichel <sre@kernel.org> <sre@debian.org> Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de> +Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com> Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com> Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com> Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f2d26cb7e853..c0fdb04a0435 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5796,6 +5796,24 @@ expediting. Set to zero to disable automatic expediting. + srcutree.srcu_max_nodelay [KNL] + Specifies the number of no-delay instances + per jiffy for which the SRCU grace period + worker thread will be rescheduled with zero + delay. Beyond this limit, worker thread will + be rescheduled with a sleep delay of one jiffy. + + srcutree.srcu_max_nodelay_phase [KNL] + Specifies the per-grace-period phase, number of + non-sleeping polls of readers. Beyond this limit, + grace period worker thread will be rescheduled + with a sleep delay of one jiffy, between each + rescan of the readers, for a grace period phase. + + srcutree.srcu_retry_check_delay [KNL] + Specifies number of microseconds of non-sleeping + delay between each non-sleeping poll of readers. + srcutree.small_contention_lim [KNL] Specifies the number of update-side contention events per jiffy will be tolerated before diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml index 762deccd3640..dfb2860ca771 100644 --- a/Documentation/devicetree/bindings/net/cdns,macb.yaml +++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml @@ -20,10 +20,17 @@ properties: - items: - enum: - - cdns,versal-gem # Xilinx Versal - cdns,zynq-gem # Xilinx Zynq-7xxx SoC - cdns,zynqmp-gem # Xilinx Zynq Ultrascale+ MPSoC - const: cdns,gem # Generic + deprecated: true + + - items: + - enum: + - xlnx,versal-gem # Xilinx Versal + - xlnx,zynq-gem # Xilinx Zynq-7xxx SoC + - xlnx,zynqmp-gem # Xilinx Zynq Ultrascale+ MPSoC + - const: cdns,gem # Generic - items: - enum: @@ -183,7 +190,7 @@ examples: #address-cells = <2>; #size-cells = <2>; gem1: ethernet@ff0c0000 { - compatible = "cdns,zynqmp-gem", "cdns,gem"; + compatible = "xlnx,zynqmp-gem", "cdns,gem"; interrupt-parent = <&gic>; interrupts = <0 59 4>, <0 59 4>; reg = <0x0 0xff0c0000 0x0 0x1000>; diff --git a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml index 5592f58fa6f0..228683773151 100644 --- a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml +++ b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml @@ -48,7 +48,7 @@ properties: "^led@[01]$": type: object description: Hellcreek leds - $ref: ../../leds/common.yaml# + $ref: /schemas/leds/common.yaml# properties: reg: diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 56d9aca8c954..c138a1022879 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -155,70 +155,65 @@ properties: - in-band-status fixed-link: - allOf: - - if: - type: array - then: - deprecated: true - items: - - minimum: 0 - maximum: 31 - description: - Emulated PHY ID, choose any but unique to the all - specified fixed-links - - - enum: [0, 1] - description: - Duplex configuration. 0 for half duplex or 1 for - full duplex - - - enum: [10, 100, 1000, 2500, 10000] - description: - Link speed in Mbits/sec. - - - enum: [0, 1] - description: - Pause configuration. 0 for no pause, 1 for pause - - - enum: [0, 1] - description: - Asymmetric pause configuration. 0 for no asymmetric - pause, 1 for asymmetric pause - - - - if: - type: object - then: - properties: - speed: - description: - Link speed. - $ref: /schemas/types.yaml#/definitions/uint32 - enum: [10, 100, 1000, 2500, 10000] - - full-duplex: - $ref: /schemas/types.yaml#/definitions/flag - description: - Indicates that full-duplex is used. When absent, half - duplex is assumed. - - pause: - $ref: /schemas/types.yaml#definitions/flag - description: - Indicates that pause should be enabled. - - asym-pause: - $ref: /schemas/types.yaml#/definitions/flag - description: - Indicates that asym_pause should be enabled. - - link-gpios: - maxItems: 1 - description: - GPIO to determine if the link is up - - required: - - speed + oneOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + deprecated: true + items: + - minimum: 0 + maximum: 31 + description: + Emulated PHY ID, choose any but unique to the all + specified fixed-links + + - enum: [0, 1] + description: + Duplex configuration. 0 for half duplex or 1 for + full duplex + + - enum: [10, 100, 1000, 2500, 10000] + description: + Link speed in Mbits/sec. + + - enum: [0, 1] + description: + Pause configuration. 0 for no pause, 1 for pause + + - enum: [0, 1] + description: + Asymmetric pause configuration. 0 for no asymmetric + pause, 1 for asymmetric pause + - type: object + additionalProperties: false + properties: + speed: + description: + Link speed. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [10, 100, 1000, 2500, 10000] + + full-duplex: + $ref: /schemas/types.yaml#/definitions/flag + description: + Indicates that full-duplex is used. When absent, half + duplex is assumed. + + pause: + $ref: /schemas/types.yaml#definitions/flag + description: + Indicates that pause should be enabled. + + asym-pause: + $ref: /schemas/types.yaml#/definitions/flag + description: + Indicates that asym_pause should be enabled. + + link-gpios: + maxItems: 1 + description: + GPIO to determine if the link is up + + required: + - speed allOf: - if: diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml index 85a8d8fb6b8f..924af2df832c 100644 --- a/Documentation/devicetree/bindings/net/fsl,fec.yaml +++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml @@ -187,6 +187,7 @@ properties: Should specify the gpio for phy reset. phy-reset-duration: + $ref: /schemas/types.yaml#/definitions/uint32 deprecated: true description: Reset duration in milliseconds. Should present only if property @@ -195,12 +196,14 @@ properties: and 1 millisecond will be used instead. phy-reset-active-high: + type: boolean deprecated: true description: If present then the reset sequence using the GPIO specified in the "phy-reset-gpios" property is reversed (H=reset state, L=operation state). phy-reset-post-delay: + $ref: /schemas/types.yaml#/definitions/uint32 deprecated: true description: Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay diff --git a/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml b/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml index 1bcaf6ba822c..a191a04e681c 100644 --- a/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml @@ -58,7 +58,6 @@ properties: spi-cpha: true spi-cpol: true - spi-max-frequency: true required: - compatible @@ -85,6 +84,7 @@ allOf: contains: const: marvell,nfc-spi then: + $ref: /schemas/spi/spi-peripheral-props.yaml# properties: break-control: false flow-control: false @@ -108,7 +108,7 @@ allOf: spi-max-frequency: false reg: false -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml b/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml index ef1155038a2f..1dcbddbc5a74 100644 --- a/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml @@ -30,8 +30,6 @@ properties: reg: maxItems: 1 - spi-max-frequency: true - uicc-present: type: boolean description: | @@ -55,10 +53,11 @@ then: properties: spi-max-frequency: false else: + $ref: /schemas/spi/spi-peripheral-props.yaml# required: - spi-max-frequency -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml b/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml index 963d9531a856..647569051ed8 100644 --- a/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml +++ b/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml @@ -25,8 +25,6 @@ properties: st95hfvin-supply: description: ST95HF transceiver's Vin regulator supply - spi-max-frequency: true - required: - compatible - enable-gpio @@ -34,7 +32,10 @@ required: - reg - spi-max-frequency -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml index 404c8df99364..9cc236ec42f2 100644 --- a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml +++ b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml @@ -40,8 +40,6 @@ properties: reg: maxItems: 1 - spi-max-frequency: true - ti,enable-gpios: minItems: 1 maxItems: 2 @@ -65,7 +63,10 @@ required: - ti,enable-gpios - vin-supply -additionalProperties: false +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/networking/devlink/devlink-selftests.rst b/Documentation/networking/devlink/devlink-selftests.rst new file mode 100644 index 000000000000..c0aa1f3aef0d --- /dev/null +++ b/Documentation/networking/devlink/devlink-selftests.rst @@ -0,0 +1,38 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +================= +Devlink Selftests +================= + +The ``devlink-selftests`` API allows executing selftests on the device. + +Tests Mask +========== +The ``devlink-selftests`` command should be run with a mask indicating +the tests to be executed. + +Tests Description +================= +The following is a list of tests that drivers may execute. + +.. list-table:: List of tests + :widths: 5 90 + + * - Name + - Description + * - ``DEVLINK_SELFTEST_FLASH`` + - Devices may have the firmware on non-volatile memory on the board, e.g. + flash. This particular test helps to run a flash selftest on the device. + Implementation of the test is left to the driver/firmware. + +example usage +------------- + +.. code:: shell + + # Query selftests supported on the devlink device + $ devlink dev selftests show DEV + # Query selftests supported on all devlink devices + $ devlink dev selftests show + # Executes selftests on the device + $ devlink dev selftests run DEV id flash diff --git a/Documentation/networking/devlink/mlxsw.rst b/Documentation/networking/devlink/mlxsw.rst index cf857cb4ba8f..433962225bd4 100644 --- a/Documentation/networking/devlink/mlxsw.rst +++ b/Documentation/networking/devlink/mlxsw.rst @@ -58,6 +58,30 @@ The ``mlxsw`` driver reports the following versions - running - Three digit firmware version +Line card auxiliary device info versions +======================================== + +The ``mlxsw`` driver reports the following versions for line card auxiliary device + +.. list-table:: devlink info versions implemented + :widths: 5 5 90 + + * - Name + - Type + - Description + * - ``hw.revision`` + - fixed + - The hardware revision for this line card + * - ``ini.version`` + - running + - Version of line card INI loaded + * - ``fw.psid`` + - fixed + - Line card device PSID + * - ``fw.version`` + - running + - Three digit firmware version of line card device + Driver-specific Traps ===================== diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 5879ef3bc2cb..56cd4ea059b2 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -636,6 +636,16 @@ tcp_recovery - INTEGER Default: 0x1 +tcp_reflect_tos - BOOLEAN + For listening sockets, reuse the DSCP value of the initial SYN message + for outgoing packets. This allows to have both directions of a TCP + stream to use the same DSCP value, assuming DSCP remains unchanged for + the lifetime of the connection. + + This options affects both IPv4 and IPv6. + + Default: 0 (disabled) + tcp_reordering - INTEGER Initial reordering level of packets in a TCP stream. TCP stack can then dynamically adjust flow reordering level @@ -2884,7 +2894,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max Default: 4K sctp_wmem - vector of 3 INTEGERs: min, default, max - Currently this tunable has no effect. + Only the first value ("min") is used, "default" and "max" are + ignored. + + min: Minimum size of send buffer that can be used by SCTP sockets. + It is guaranteed to each SCTP socket (but not association) even + under moderate memory pressure. + + Default: 4K addr_scope_policy - INTEGER Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6e090fb96a0e..98a283930307 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5658,7 +5658,7 @@ by a string of size ``name_size``. #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) - #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES + #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) diff --git a/MAINTAINERS b/MAINTAINERS index 46b345ddc67c..1920d82db83e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15862,7 +15862,7 @@ PIN CONTROLLER - FREESCALE M: Dong Aisheng <aisheng.dong@nxp.com> M: Fabio Estevam <festevam@gmail.com> M: Shawn Guo <shawnguo@kernel.org> -M: Stefan Agner <stefan@agner.ch> +M: Jacky Bai <ping.bai@nxp.com> R: Pengutronix Kernel Team <kernel@pengutronix.de> L: linux-gpio@vger.kernel.org S: Maintained @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/lan966x.dtsi b/arch/arm/boot/dts/lan966x.dtsi index 3cb02fffe716..38e90a31d2dd 100644 --- a/arch/arm/boot/dts/lan966x.dtsi +++ b/arch/arm/boot/dts/lan966x.dtsi @@ -38,7 +38,7 @@ sys_clk: sys_clk { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <162500000>; + clock-frequency = <165625000>; }; cpu_clk: cpu_clk { diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index c546356d0f02..5738496717e2 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -549,7 +549,7 @@ static struct pxa2xx_spi_controller corgi_spi_info = { }; static struct gpiod_lookup_table corgi_spi_gpio_table = { - .dev_id = "pxa2xx-spi.1", + .dev_id = "spi1", .table = { GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index 2ae06edf413c..2fd665944103 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -635,7 +635,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = { }; static struct gpiod_lookup_table pxa_ssp2_gpio_table = { - .dev_id = "pxa2xx-spi.2", + .dev_id = "spi2", .table = { GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), { }, diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index 753fe166ab68..624088257cfc 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -140,7 +140,7 @@ struct platform_device pxa_spi_ssp4 = { }; static struct gpiod_lookup_table pxa_ssp3_gpio_table = { - .dev_id = "pxa2xx-spi.3", + .dev_id = "spi3", .table = { GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW), GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW), @@ -149,7 +149,7 @@ static struct gpiod_lookup_table pxa_ssp3_gpio_table = { }; static struct gpiod_lookup_table pxa_ssp4_gpio_table = { - .dev_id = "pxa2xx-spi.4", + .dev_id = "spi4", .table = { GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW), GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index f98dc61e87af..98423a96f440 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -207,7 +207,7 @@ static struct spi_board_info littleton_spi_devices[] __initdata = { }; static struct gpiod_lookup_table littleton_spi_gpio_table = { - .dev_id = "pxa2xx-spi.2", + .dev_id = "spi2", .table = { GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), { }, diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index 20456a55c4c5..0827ebca1d38 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -994,7 +994,7 @@ static struct pxa2xx_spi_controller magician_spi_info = { }; static struct gpiod_lookup_table magician_spi_gpio_table = { - .dev_id = "pxa2xx-spi.2", + .dev_id = "spi2", .table = { /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */ GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index dd88953adc9d..9964729cd428 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -578,7 +578,7 @@ static struct pxa2xx_spi_controller spitz_spi_info = { }; static struct gpiod_lookup_table spitz_spi_gpio_table = { - .dev_id = "pxa2xx-spi.2", + .dev_id = "spi2", .table = { GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c index d03520555497..c4d4162a7e6e 100644 --- a/arch/arm/mach-pxa/z2.c +++ b/arch/arm/mach-pxa/z2.c @@ -623,7 +623,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = { }; static struct gpiod_lookup_table pxa_ssp1_gpio_table = { - .dev_id = "pxa2xx-spi.1", + .dev_id = "spi1", .table = { GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW), { }, @@ -631,7 +631,7 @@ static struct gpiod_lookup_table pxa_ssp1_gpio_table = { }; static struct gpiod_lookup_table pxa_ssp2_gpio_table = { - .dev_id = "pxa2xx-spi.2", + .dev_id = "spi2", .table = { GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), { }, diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 34cf8a598617..a4c46a03d2e2 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -73,6 +73,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts index 039b92abf046..f72540bd14a3 100644 --- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts +++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts @@ -35,7 +35,7 @@ gpio-keys { compatible = "gpio-keys"; - key0 { + key { label = "KEY0"; linux,code = <BTN_0>; gpios = <&gpio0 10 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index b9e30df127fe..8abdbe26a1d0 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -47,7 +47,7 @@ gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index 8d23401b0bbb..3c6df1ecf76f 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -52,7 +52,7 @@ gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index 24fd83b43d9d..03c9843d503e 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -46,19 +46,19 @@ gpio-keys { compatible = "gpio-keys"; - up { + key-up { label = "UP"; linux,code = <BTN_1>; gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>; }; - press { + key-press { label = "PRESS"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; }; - down { + key-down { label = "DOWN"; linux,code = <BTN_2>; gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 25341f38292a..7164ad063178 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -23,7 +23,7 @@ gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = <BTN_0>; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index c71d6591d539..33bb60a354cd 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -78,7 +78,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o +obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 9cb85095fd45..0cb94992c15b 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -349,7 +349,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, { const char *strtab, *name, *shstrtab; const Elf_Shdr *sechdrs; - Elf_Rela *relas; + Elf64_Rela *relas; int i, r_type; /* String & section header string table */ diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 2c6e1c6ecbe7..4120c428dc37 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017, 2020 + * Copyright IBM Corp. 2017, 2022 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -14,6 +14,7 @@ #ifdef CONFIG_ARCH_RANDOM #include <linux/static_key.h> +#include <linux/preempt.h> #include <linux/atomic.h> #include <asm/cpacf.h> @@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) { + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); atomic64_add(sizeof(*v), &s390_arch_random_counter); return true; @@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) { + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); atomic64_add(sizeof(*v), &s390_arch_random_counter); return true; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7fff10e15969..52a7f91527fe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2474,7 +2474,7 @@ config RETHUNK bool "Enable return-thunks" depends on RETPOLINE && CC_HAS_RETURN_THUNK select OBJTOOL if HAVE_OBJTOOL - default y + default y if X86_64 help Compile the kernel with the return-thunks compiler option to guard against kernel-to-user data leaks by avoiding return speculation. @@ -2483,21 +2483,21 @@ config RETHUNK config CPU_UNRET_ENTRY bool "Enable UNRET on kernel entry" - depends on CPU_SUP_AMD && RETHUNK + depends on CPU_SUP_AMD && RETHUNK && X86_64 default y help Compile the kernel with support for the retbleed=unret mitigation. config CPU_IBPB_ENTRY bool "Enable IBPB on kernel entry" - depends on CPU_SUP_AMD + depends on CPU_SUP_AMD && X86_64 default y help Compile the kernel with support for the retbleed=ibpb mitigation. config CPU_IBRS_ENTRY bool "Enable IBRS on kernel entry" - depends on CPU_SUP_INTEL + depends on CPU_SUP_INTEL && X86_64 default y help Compile the kernel with support for the spectre_v2=ibrs mitigation. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1f40dad30d50..7854685c5f25 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -27,6 +27,7 @@ RETHUNK_CFLAGS := -mfunction-return=thunk-extern RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) endif +export RETHUNK_CFLAGS export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 13179f31fe10..4f70fb6c2c1e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -278,9 +278,9 @@ enum { }; /* - * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in - * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when - * TSX is not supported they have no consistent behavior: + * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x + * are the TSX flags when TSX is supported, but when TSX is not supported + * they have no consistent behavior: * * - For wrmsr(), bits 61:62 are considered part of the sign extension. * - For HW updates (branch captures) bits 61:62 are always OFF and are not @@ -288,7 +288,7 @@ enum { * * Therefore, if: * - * 1) LBR has TSX format + * 1) LBR format LBR_FORMAT_EIP_FLAGS2 * 2) CPU has no TSX support enabled * * ... then any value passed to wrmsr() must be sign extended to 63 bits and any @@ -300,7 +300,7 @@ static inline bool lbr_from_signext_quirk_needed(void) bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM); - return !tsx_support && x86_pmu.lbr_has_tsx; + return !tsx_support; } static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); @@ -1609,9 +1609,6 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_map = hsw_lbr_sel_map; x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); - - if (lbr_from_signext_quirk_needed()) - static_branch_enable(&lbr_from_quirk_key); } /* skylake */ @@ -1702,7 +1699,11 @@ void intel_pmu_lbr_init(void) switch (x86_pmu.intel_cap.lbr_format) { case LBR_FORMAT_EIP_FLAGS2: x86_pmu.lbr_has_tsx = 1; - fallthrough; + x86_pmu.lbr_from_flags = 1; + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); + break; + case LBR_FORMAT_EIP_FLAGS: x86_pmu.lbr_from_flags = 1; break; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 00f5227c8459..a77b915d36a8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -302,6 +302,7 @@ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 10a3bfc1eb23..38a3e86e665e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -297,6 +297,8 @@ do { \ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ + X86_FEATURE_USE_IBPB_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d6858533e6e5..62f6b8b7c4a5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -555,7 +555,9 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) dest = addr + insn.length + insn.immediate.value; if (__static_call_fixup(addr, op, dest) || - WARN_ON_ONCE(dest != &__x86_return_thunk)) + WARN_ONCE(dest != &__x86_return_thunk, + "missing return thunk: %pS-%pS: %*ph", + addr, dest, 5, addr)) continue; DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index aa34f908c39f..6454bc767f0f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -975,6 +975,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; } #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" #define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n" #ifdef CONFIG_BPF_SYSCALL void unpriv_ebpf_notify(int new_state) @@ -1415,6 +1416,8 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_IBRS: setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) + pr_warn(SPECTRE_V2_IBRS_PERF_MSG); break; case SPECTRE_V2_LFENCE: @@ -1516,7 +1519,16 @@ static void __init spectre_v2_select_mitigation(void) * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) { + + if (retbleed_cmd != RETBLEED_CMD_IBPB) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW); + pr_info("Enabling Speculation Barrier for firmware calls\n"); + } + + } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 143e37298d8a..e5fa335a4ea7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6029,6 +6029,11 @@ split_irqchip_unlock: r = 0; break; case KVM_CAP_X86_USER_SPACE_MSR: + r = -EINVAL; + if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER)) + break; kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; break; @@ -6183,6 +6188,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) return -EFAULT; + if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) empty &= !filter.ranges[i].nmsrs; diff --git a/certs/Kconfig b/certs/Kconfig index 476755703cf8..bf9b511573d7 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -43,6 +43,7 @@ config SYSTEM_TRUSTED_KEYRING bool "Provide system-wide ring of trusted keys" depends on KEYS depends on ASYMMETRIC_KEY_TYPE + depends on X509_CERTIFICATE_PARSER help Provide a system keyring to which trusted keys can be added. Keys in the keyring are considered to be trusted. Keys may be added at will diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 6ff1901d7d43..3c6d4ef87be0 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -782,7 +782,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) if (!osc_cpc_flexible_adr_space_confirmed) { pr_debug("Flexible address space capability not supported\n"); - goto out_free; + if (!cpc_supported_by_cpu()) + goto out_free; } addr = ioremap(gas_t->address, gas_t->bit_width/8); @@ -809,7 +810,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } if (!osc_cpc_flexible_adr_space_confirmed) { pr_debug("Flexible address space capability not supported\n"); - goto out_free; + if (!cpc_supported_by_cpu()) + goto out_free; } } else { if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) { diff --git a/drivers/clk/clk-lan966x.c b/drivers/clk/clk-lan966x.c index d1535ac13e89..81cb90955d68 100644 --- a/drivers/clk/clk-lan966x.c +++ b/drivers/clk/clk-lan966x.c @@ -213,7 +213,7 @@ static int lan966x_gate_clk_register(struct device *dev, hw_data->hws[i] = devm_clk_hw_register_gate(dev, clk_gate_desc[idx].name, - "lan966x", 0, base, + "lan966x", 0, gate_base, clk_gate_desc[idx].bit_idx, 0, &clk_gate_lock); diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index dcc141068128..af22be84034b 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -202,15 +202,6 @@ struct fwnet_packet_task { }; /* - * Get fifo address embedded in hwaddr - */ -static __u64 fwnet_hwaddr_fifo(union fwnet_hwaddr *ha) -{ - return (u64)get_unaligned_be16(&ha->uc.fifo_hi) << 32 - | get_unaligned_be32(&ha->uc.fifo_lo); -} - -/* * saddr == NULL means use device source address. * daddr == NULL means leave destination address (eg unresolved arp). */ @@ -1306,7 +1297,7 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) max_payload = peer->max_payload; datagram_label_ptr = &peer->datagram_label; - ptask->fifo_addr = fwnet_hwaddr_fifo(ha); + ptask->fifo_addr = get_unaligned_be48(ha->uc.fifo); ptask->generation = generation; ptask->dest_node = dest_node; ptask->speed = peer->speed; @@ -1494,8 +1485,7 @@ static int fwnet_probe(struct fw_unit *unit, ha.uc.uniq_id = cpu_to_be64(card->guid); ha.uc.max_rec = dev->card->max_receive; ha.uc.sspd = dev->card->link_speed; - ha.uc.fifo_hi = cpu_to_be16(dev->local_fifo >> 32); - ha.uc.fifo_lo = cpu_to_be32(dev->local_fifo & 0xffffffff); + put_unaligned_be48(dev->local_fifo, ha.uc.fifo); dev_addr_set(net, ha.u); memset(net->broadcast, -1, net->addr_len); diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 08bc52c3cdcb..ecd7d169470b 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -351,6 +351,9 @@ static const struct regmap_config pca953x_i2c_regmap = { .reg_bits = 8, .val_bits = 8, + .use_single_read = true, + .use_single_write = true, + .readable_reg = pca953x_readable_register, .writeable_reg = pca953x_writeable_register, .volatile_reg = pca953x_volatile_register, @@ -906,15 +909,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) { DECLARE_BITMAP(val, MAX_LINE); + u8 regaddr; int ret; - ret = regcache_sync_region(chip->regmap, chip->regs->output, - chip->regs->output + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; - ret = regcache_sync_region(chip->regmap, chip->regs->direction, - chip->regs->direction + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; @@ -1127,14 +1133,14 @@ static int pca953x_regcache_sync(struct device *dev) * sync these registers first and only then sync the rest. */ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); - ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret); return ret; } regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); - ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret); return ret; @@ -1144,7 +1150,7 @@ static int pca953x_regcache_sync(struct device *dev) if (chip->driver_data & PCA_PCAL) { regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0); ret = regcache_sync_region(chip->regmap, regaddr, - regaddr + NBANK(chip)); + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT latch registers: %d\n", ret); @@ -1153,7 +1159,7 @@ static int pca953x_regcache_sync(struct device *dev) regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0); ret = regcache_sync_region(chip->regmap, regaddr, - regaddr + NBANK(chip)); + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT mask registers: %d\n", ret); diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index b6d3a57e27ed..7f8e2fed2988 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -99,7 +99,7 @@ static inline void xgpio_set_value32(unsigned long *map, int bit, u32 v) const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5); map[index] &= ~(0xFFFFFFFFul << offset); - map[index] |= v << offset; + map[index] |= (unsigned long)v << offset; } static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6b6d46e29e6e..4608599ba6bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1364,16 +1364,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.bo; if (!process_info) return; - /* Release eviction fence from PD */ - amdgpu_bo_reserve(pd, false); - amdgpu_bo_fence(pd, NULL, false); - amdgpu_bo_unreserve(pd); - /* Update process info */ mutex_lock(&process_info->lock); process_info->n_vms--; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6..2168163aad2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf..9caea1688fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e..d8f1335bc68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ out_free_user_pages: kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 93ac33a8de9a..3087dd1a1856 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1653,7 +1653,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work(); #endif - if (dc_enable_dmub_notifications(adev->dm.dc)) { + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { init_completion(&adev->dm.dmub_aux_transfer_done); adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL); if (!adev->dm.dmub_notify) { @@ -1689,6 +1689,13 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } + /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. + * It is expected that DMUB will resend any pending notifications at this point, for + * example HPD from DPIA. + */ + if (dc_is_dmub_outbox_supported(adev->dm.dc)) + dc_enable_dmub_outbox(adev->dm.dc); + /* create fake encoders for MST */ dm_dp_create_fake_mst_encoders(adev); @@ -2678,9 +2685,6 @@ static int dm_resume(void *handle) */ link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); - if (dc_enable_dmub_notifications(adev->dm.dc)) - amdgpu_dm_outbox_init(adev); - r = dm_dmub_hw_init(adev); if (r) DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); @@ -2698,6 +2702,11 @@ static int dm_resume(void *handle) } } + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + amdgpu_dm_outbox_init(adev); + dc_enable_dmub_outbox(adev->dm.dc); + } + WARN_ON(!dc_commit_state(dm->dc, dc_state)); dm_gpureset_commit_state(dm->cached_dc_state, dm); @@ -2719,13 +2728,15 @@ static int dm_resume(void *handle) /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ dc_resource_state_construct(dm->dc, dm_state->context); - /* Re-enable outbox interrupts for DPIA. */ - if (dc_enable_dmub_notifications(adev->dm.dc)) - amdgpu_dm_outbox_init(adev); - /* Before powering on DC we need to re-initialize DMUB. */ dm_dmub_hw_resume(adev); + /* Re-enable outbox interrupts for DPIA. */ + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + amdgpu_dm_outbox_init(adev); + dc_enable_dmub_outbox(adev->dm.dc); + } + /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index d5962a34c01d..e5fc875990c4 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem, struct iosys_map *map) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + int ret; + + dma_resv_lock(gem->resv, NULL); + ret = ttm_bo_vmap(bo, map); + dma_resv_unlock(gem->resv); - return ttm_bo_vmap(bo, map); + return ret; } EXPORT_SYMBOL(drm_gem_ttm_vmap); @@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem, { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + dma_resv_lock(gem->resv, NULL); ttm_bo_vunmap(bo, map); + dma_resv_unlock(gem->resv); } EXPORT_SYMBOL(drm_gem_ttm_vunmap); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 09f82545789f..44e7339e7a4a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -273,10 +273,17 @@ struct intel_context { u8 child_index; /** @guc: GuC specific members for parallel submission */ struct { - /** @wqi_head: head pointer in work queue */ + /** @wqi_head: cached head pointer in work queue */ u16 wqi_head; - /** @wqi_tail: tail pointer in work queue */ + /** @wqi_tail: cached tail pointer in work queue */ u16 wqi_tail; + /** @wq_head: pointer to the actual head in work queue */ + u32 *wq_head; + /** @wq_tail: pointer to the actual head in work queue */ + u32 *wq_tail; + /** @wq_status: pointer to the status in work queue */ + u32 *wq_status; + /** * @parent_page: page in context state (ce->state) used * by parent for work queue, process descriptor diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 86f7a9ac1c39..2b0266cab66b 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -661,6 +661,16 @@ static inline void execlists_schedule_out(struct i915_request *rq) i915_request_put(rq); } +static u32 map_i915_prio_to_lrc_desc_prio(int prio) +{ + if (prio > I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_HIGH; + else if (prio < I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_LOW; + else + return GEN12_CTX_PRIORITY_NORMAL; +} + static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -669,7 +679,7 @@ static u64 execlists_update_context(struct i915_request *rq) desc = ce->lrc.desc; if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) - desc |= lrc_desc_priority(rq_prio(rq)); + desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq)); /* * WaIdleLiteRestore:bdw,skl diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 31be734010db..a390f0813c8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -111,16 +111,6 @@ enum { #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 -static inline u32 lrc_desc_priority(int prio) -{ - if (prio > I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_HIGH; - else if (prio < I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_LOW; - else - return GEN12_CTX_PRIORITY_NORMAL; -} - static inline void lrc_runtime_start(struct intel_context *ce) { struct intel_context_stats *stats = &ce->stats; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 4ef9990ed7f8..29ef8afc8c2e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -122,6 +122,9 @@ enum intel_guc_action { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005, + INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 966e69a8b1c1..9feda105f913 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -170,6 +170,11 @@ struct intel_guc { /** @ads_engine_usage_size: size of engine usage in the ADS */ u32 ads_engine_usage_size; + /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */ + struct i915_vma *lrc_desc_pool_v69; + /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */ + void *lrc_desc_pool_vaddr_v69; + /** * @context_lookup: used to resolve intel_context from guc_id, if a * context is present in this structure it is registered with the GuC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 42cb7a9a6199..89a7e5ec0614 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -203,6 +203,20 @@ struct guc_wq_item { u32 fence_id; } __packed; +struct guc_process_desc_v69 { + u32 stage_id; + u64 db_base_addr; + u32 head; + u32 tail; + u32 error_offset; + u64 wq_base_addr; + u32 wq_size_bytes; + u32 wq_status; + u32 engine_presence; + u32 priority; + u32 reserved[36]; +} __packed; + struct guc_sched_wq_desc { u32 head; u32 tail; @@ -227,6 +241,37 @@ struct guc_ctxt_registration_info { }; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +/* Preempt to idle on quantum expiry */ +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0) + +/* + * GuC Context registration descriptor. + * FIXME: This is only required to exist during context registration. + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC + * is not required. + */ +struct guc_lrc_desc_v69 { + u32 hw_context_desc; + u32 slpm_perf_mode_hint; /* SPLC v1 only */ + u32 slpm_freq_hint; + u32 engine_submit_mask; /* In logical space */ + u8 engine_class; + u8 reserved0[3]; + u32 priority; + u32 process_desc; + u32 wq_addr; + u32 wq_size; + u32 context_flags; /* CONTEXT_REGISTRATION_* */ + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to complete before issuing a + * reset. (in micro seconds). + */ + u32 preemption_timeout; + u32 policy_flags; /* CONTEXT_POLICY_* */ + u32 reserved1[19]; +} __packed; + /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { u32 kl; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1726f0f19901..9ffb343d0f79 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -414,12 +414,15 @@ struct sync_semaphore { }; struct parent_scratch { - struct guc_sched_wq_desc wq_desc; + union guc_descs { + struct guc_sched_wq_desc wq_desc; + struct guc_process_desc_v69 pdesc; + } descs; struct sync_semaphore go; struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; - u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + u8 unused[WQ_OFFSET - sizeof(union guc_descs) - sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; u32 wq[WQ_SIZE / sizeof(u32)]; @@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce) LRC_STATE_OFFSET) / sizeof(u32))); } +static struct guc_process_desc_v69 * +__get_process_desc_v69(struct intel_context *ce) +{ + struct parent_scratch *ps = __get_parent_scratch(ce); + + return &ps->descs.pdesc; +} + static struct guc_sched_wq_desc * -__get_wq_desc(struct intel_context *ce) +__get_wq_desc_v70(struct intel_context *ce) { struct parent_scratch *ps = __get_parent_scratch(ce); - return &ps->wq_desc; + return &ps->descs.wq_desc; } -static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) { /* * Check for space in work queue. Caching a value of head pointer in @@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, #define AVAILABLE_SPACE \ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) if (wqi_size > AVAILABLE_SPACE) { - ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); + ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); if (wqi_size > AVAILABLE_SPACE) return NULL; @@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) return ce; } +static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) +{ + struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; + + if (!base) + return NULL; + + GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); + + return &base[index]; +} + +static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) +{ + u32 size; + int ret; + + size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * + GUC_MAX_CONTEXT_ID); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, + (void **)&guc->lrc_desc_pool_vaddr_v69); + if (ret) + return ret; + + return 0; +} + +static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) +{ + if (!guc->lrc_desc_pool_vaddr_v69) + return; + + guc->lrc_desc_pool_vaddr_v69 = NULL; + i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); +} + static inline bool guc_submission_initialized(struct intel_guc *guc) { return guc->submission_initialized; } +static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) +{ + struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); + + if (desc) + memset(desc, 0, sizeof(*desc)); +} + static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) { return __get_context(guc, id); @@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) if (unlikely(!guc_submission_initialized(guc))) return; + _reset_lrc_desc_v69(guc, id); + /* * xarray API doesn't have xa_erase_irqsave wrapper, so calling * the lower level functions directly. @@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) true, timeout); } -static int guc_context_policy_init(struct intel_context *ce, bool loop); +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); static int try_context_registration(struct intel_context *ce, bool loop); static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) @@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) GEM_BUG_ON(context_guc_id_invalid(ce)); if (context_policy_required(ce)) { - err = guc_context_policy_init(ce, false); + err = guc_context_policy_init_v70(ce, false); if (err) return err; } @@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce) return (WQ_SIZE - ce->parallel.guc.wqi_tail); } -static void write_wqi(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static void write_wqi(struct intel_context *ce, u32 wqi_size) { BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); @@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc, ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & (WQ_SIZE - 1); - WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); + WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); } static int guc_wq_noop_append(struct intel_context *ce) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); - u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); + u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; if (!wqi) @@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq) { struct intel_context *ce = request_to_scheduling_context(rq); struct intel_context *child; - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); unsigned int wqi_size = (ce->parallel.number_children + 4) * sizeof(u32); u32 *wqi; @@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq) return ret; } - wqi = get_wq_pointer(wq_desc, ce, wqi_size); + wqi = get_wq_pointer(ce, wqi_size); if (!wqi) return -EBUSY; @@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq) for_each_child(ce, child) *wqi++ = child->ring->tail / sizeof(u64); - write_wqi(wq_desc, ce, wqi_size); + write_wqi(ce, wqi_size); return 0; } @@ -1868,20 +1919,34 @@ static void reset_fail_worker_func(struct work_struct *w); int intel_guc_submission_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); + int ret; if (guc->submission_initialized) return 0; + if (guc->fw.major_ver_found < 70) { + ret = guc_lrc_desc_pool_create_v69(guc); + if (ret) + return ret; + } + guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; + if (!guc->submission_state.guc_ids_bitmap) { + ret = -ENOMEM; + goto destroy_pool; + } guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; guc->timestamp.shift = gpm_timestamp_shift(gt); guc->submission_initialized = true; return 0; + +destroy_pool: + guc_lrc_desc_pool_destroy_v69(guc); + + return ret; } void intel_guc_submission_fini(struct intel_guc *guc) @@ -1890,6 +1955,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) return; guc_flush_destroyed_contexts(guc); + guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); guc->submission_initialized = false; @@ -2147,10 +2213,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) spin_unlock_irqrestore(&guc->submission_state.lock, flags); } -static int __guc_action_register_multi_lrc(struct intel_guc *guc, - struct intel_context *ce, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, + struct intel_context *ce, + u32 guc_id, + u32 offset, + bool loop) +{ + struct intel_context *child; + u32 action[4 + MAX_ENGINE_INSTANCE]; + int len = 0; + + GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); + + action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = guc_id; + action[len++] = ce->parallel.number_children + 1; + action[len++] = offset; + for_each_child(ce, child) { + offset += sizeof(struct guc_lrc_desc_v69); + action[len++] = offset; + } + + return guc_submission_send_busy_loop(guc, action, len, 0, loop); +} + +static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, + struct intel_context *ce, + struct guc_ctxt_registration_info *info, + bool loop) { struct intel_context *child; u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; @@ -2190,9 +2280,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, len, 0, loop); } -static int __guc_action_register_context(struct intel_guc *guc, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_context_v69(struct intel_guc *guc, + u32 guc_id, + u32 offset, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_CONTEXT, + guc_id, + offset, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + 0, loop); +} + +static int __guc_action_register_context_v70(struct intel_guc *guc, + struct guc_ctxt_registration_info *info, + bool loop) { u32 action[] = { INTEL_GUC_ACTION_REGISTER_CONTEXT, @@ -2213,24 +2318,52 @@ static int __guc_action_register_context(struct intel_guc *guc, 0, loop); } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info); +static void prepare_context_registration_info_v69(struct intel_context *ce); +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info); -static int register_context(struct intel_context *ce, bool loop) +static int +register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) +{ + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + + ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); + + prepare_context_registration_info_v69(ce); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, + offset, loop); + else + return __guc_action_register_context_v69(guc, ce->guc_id.id, + offset, loop); +} + +static int +register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) { struct guc_ctxt_registration_info info; + + prepare_context_registration_info_v70(ce, &info); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); + else + return __guc_action_register_context_v70(guc, &info, loop); +} + +static int register_context(struct intel_context *ce, bool loop) +{ struct intel_guc *guc = ce_to_guc(ce); int ret; GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - prepare_context_registration_info(ce, &info); - - if (intel_context_is_parent(ce)) - ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); + if (guc->fw.major_ver_found >= 70) + ret = register_context_v70(guc, ce, loop); else - ret = __guc_action_register_context(guc, &info, loop); + ret = register_context_v69(guc, ce, loop); + if (likely(!ret)) { unsigned long flags; @@ -2238,7 +2371,8 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - guc_context_policy_init(ce, loop); + if (guc->fw.major_ver_found >= 70) + guc_context_policy_init_v70(ce, loop); } return ret; @@ -2335,7 +2469,7 @@ static int __guc_context_set_context_policies(struct intel_guc *guc, 0, loop); } -static int guc_context_policy_init(struct intel_context *ce, bool loop) +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; @@ -2394,8 +2528,108 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop) return ret; } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info) +static void guc_context_policy_init_v69(struct intel_engine_cs *engine, + struct guc_lrc_desc_v69 *desc) +{ + desc->policy_flags = 0; + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; + + /* NB: For both of these, zero means disabled. */ + desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; + desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; +} + +static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) +{ + /* + * this matches the mapping we do in map_i915_prio_to_guc_prio() + * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) + */ + switch (prio) { + default: + MISSING_CASE(prio); + fallthrough; + case GUC_CLIENT_PRIORITY_KMD_NORMAL: + return GEN12_CTX_PRIORITY_NORMAL; + case GUC_CLIENT_PRIORITY_NORMAL: + return GEN12_CTX_PRIORITY_LOW; + case GUC_CLIENT_PRIORITY_HIGH: + case GUC_CLIENT_PRIORITY_KMD_HIGH: + return GEN12_CTX_PRIORITY_HIGH; + } +} + +static void prepare_context_registration_info_v69(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 ctx_id = ce->guc_id.id; + struct guc_lrc_desc_v69 *desc; + struct intel_context *child; + + GEM_BUG_ON(!engine->mask); + + /* + * Ensure LRC + CT vmas are is same region as write barrier is done + * based on CT vma region. + */ + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != + i915_gem_object_is_lmem(ce->ring->vma->obj)); + + desc = __get_lrc_desc_v69(guc, ctx_id); + desc->engine_class = engine_class_to_guc_class(engine->class); + desc->engine_submit_mask = engine->logical_mask; + desc->hw_context_desc = ce->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + + /* + * If context is a parent, we need to register a process descriptor + * describing a work queue and register all child contexts. + */ + if (intel_context_is_parent(ce)) { + struct guc_process_desc_v69 *pdesc; + + ce->parallel.guc.wqi_tail = 0; + ce->parallel.guc.wqi_head = 0; + + desc->process_desc = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + desc->wq_addr = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + desc->wq_size = WQ_SIZE; + + pdesc = __get_process_desc_v69(ce); + memset(pdesc, 0, sizeof(*(pdesc))); + pdesc->stage_id = ce->guc_id.id; + pdesc->wq_base_addr = desc->wq_addr; + pdesc->wq_size_bytes = desc->wq_size; + pdesc->wq_status = WQ_STATUS_ACTIVE; + + ce->parallel.guc.wq_head = &pdesc->head; + ce->parallel.guc.wq_tail = &pdesc->tail; + ce->parallel.guc.wq_status = &pdesc->wq_status; + + for_each_child(ce, child) { + desc = __get_lrc_desc_v69(guc, child->guc_id.id); + + desc->engine_class = + engine_class_to_guc_class(engine->class); + desc->hw_context_desc = child->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + } + + clear_children_join_go_memory(ce); + } +} + +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; @@ -2420,6 +2654,8 @@ static void prepare_context_registration_info(struct intel_context *ce, */ info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); + if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) + info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); info->flags = CONTEXT_REGISTRATION_FLAG_KMD; /* @@ -2443,10 +2679,14 @@ static void prepare_context_registration_info(struct intel_context *ce, info->wq_base_hi = upper_32_bits(wq_base_offset); info->wq_size = WQ_SIZE; - wq_desc = __get_wq_desc(ce); + wq_desc = __get_wq_desc_v70(ce); memset(wq_desc, 0, sizeof(*wq_desc)); wq_desc->wq_status = WQ_STATUS_ACTIVE; + ce->parallel.guc.wq_head = &wq_desc->head; + ce->parallel.guc.wq_tail = &wq_desc->tail; + ce->parallel.guc.wq_status = &wq_desc->wq_status; + clear_children_join_go_memory(ce); } } @@ -2761,11 +3001,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, guc_id); - __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, guc_id); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, + guc_id, + preemption_timeout + }; + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) @@ -3013,11 +3263,21 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, ce->guc_id.id); - __guc_context_policy_add_priority(&policy, ce->guc_state.prio); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, + ce->guc_id.id, + ce->guc_state.prio, + }; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void guc_context_set_prio(struct intel_guc *guc, @@ -4527,17 +4787,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); if (intel_context_is_parent(ce)) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); struct intel_context *child; drm_printf(p, "\t\tNumber children: %u\n", ce->parallel.number_children); - drm_printf(p, "\t\tWQI Head: %u\n", - READ_ONCE(wq_desc->head)); - drm_printf(p, "\t\tWQI Tail: %u\n", - READ_ONCE(wq_desc->tail)); - drm_printf(p, "\t\tWQI Status: %u\n\n", - READ_ONCE(wq_desc->wq_status)); + + if (ce->parallel.guc.wq_status) { + drm_printf(p, "\t\tWQI Head: %u\n", + READ_ONCE(*ce->parallel.guc.wq_head)); + drm_printf(p, "\t\tWQI Tail: %u\n", + READ_ONCE(*ce->parallel.guc.wq_tail)); + drm_printf(p, "\t\tWQI Status: %u\n\n", + READ_ONCE(*ce->parallel.guc.wq_status)); + } if (ce->engine->emit_bb_start == emit_bb_start_parent_no_preempt_mid_batch) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 2ff55b9994bc..703f42ba5ddd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) +#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) + #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ @@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, MODULE_FIRMWARE(uc_); INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) /* The below structs and macros are used to iterate across the list of blobs */ @@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) static const struct uc_fw_platform_requirement blobs_guc[] = { INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) }; + static const struct uc_fw_platform_requirement blobs_guc_fallback[] = { + INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB) + }; static const struct uc_fw_platform_requirement blobs_huc[] = { INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) }; @@ -179,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { const struct uc_fw_blob *blob = &fw_blobs[i].blob; uc_fw->path = blob->path; + uc_fw->wanted_path = blob->path; uc_fw->major_ver_wanted = blob->major; uc_fw->minor_ver_wanted = blob->minor; break; } } + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) { + const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback; + u32 count = ARRAY_SIZE(blobs_guc_fallback); + + for (i = 0; i < count && p <= blobs[i].p; i++) { + if (p == blobs[i].p && rev >= blobs[i].rev) { + const struct uc_fw_blob *blob = &blobs[i].blob; + + uc_fw->fallback.path = blob->path; + uc_fw->fallback.major_ver = blob->major; + uc_fw->fallback.minor_ver = blob->minor; + break; + } + } + } + /* make sure the list is ordered as expected */ if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { for (i = 1; i < fw_count; i++) { @@ -338,7 +363,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = request_firmware(&fw, uc_fw->path, dev); + err = firmware_request_nowarn(&fw, uc_fw->path, dev); + if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) { + err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev); + if (!err) { + drm_notice(&i915->drm, + "%s firmware %s is recommended, but only %s was found\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->wanted_path, + uc_fw->fallback.path); + drm_info(&i915->drm, + "Consider updating your linux-firmware pkg or downloading from %s\n", + INTEL_UC_FIRMWARE_URL); + + uc_fw->path = uc_fw->fallback.path; + uc_fw->major_ver_wanted = uc_fw->fallback.major_ver; + uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver; + } + } if (err) goto fail; @@ -437,8 +479,8 @@ fail: INTEL_UC_FIRMWARE_MISSING : INTEL_UC_FIRMWARE_ERROR); - drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n", intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); @@ -796,7 +838,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { drm_printf(p, "%s firmware: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path); + if (uc_fw->fallback.path) { + drm_printf(p, "%s firmware fallback: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path); + drm_printf(p, "fallback selected: %s\n", + str_yes_no(uc_fw->path == uc_fw->fallback.path)); + } drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 3229018877d3..562acdf88adb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -74,6 +74,7 @@ struct intel_uc_fw { const enum intel_uc_fw_status status; enum intel_uc_fw_status __status; /* no accidental overwrites */ }; + const char *wanted_path; const char *path; bool user_overridden; size_t size; @@ -98,6 +99,12 @@ struct intel_uc_fw { u16 major_ver_found; u16 minor_ver_found; + struct { + const char *path; + u16 major_ver; + u16 minor_ver; + } fallback; + u32 rsa_size; u32 ucode_size; diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c index c849533ca83e..3f5750cc2673 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-dev.c +++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c @@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output) ret = dcss_submodules_init(dcss); if (ret) { + of_node_put(dcss->of_port); dev_err(dev, "submodules initialization failed\n"); goto clks_err; } @@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss) dcss_clocks_disable(dcss); } + of_node_put(dcss->of_port); + pm_runtime_disable(dcss->dev); dcss_submodules_stop(dcss); diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index c96014464355..a189982601a4 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -713,7 +713,7 @@ static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel) of_property_read_u32(dev->of_node, "hpd-reliable-delay-ms", &reliable_ms); desc->delay.hpd_reliable = reliable_ms; of_property_read_u32(dev->of_node, "hpd-absent-delay-ms", &absent_ms); - desc->delay.hpd_reliable = absent_ms; + desc->delay.hpd_absent = absent_ms; /* Power the panel on so we can read the EDID */ ret = pm_runtime_get_sync(dev); diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 191c56064f19..6b25b2f4f5a3 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -190,7 +190,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) } EXPORT_SYMBOL(drm_sched_entity_flush); -static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk) +static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); @@ -207,8 +207,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, struct drm_sched_job *job = container_of(cb, struct drm_sched_job, finish_cb); - init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work); - irq_work_queue(&job->work); + INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); + schedule_work(&job->work); } static struct dma_fence * diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 3d6f8ee355bf..630cfa4ddd46 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -388,9 +388,9 @@ static irqreturn_t cdns_i2c_slave_isr(void *ptr) */ static irqreturn_t cdns_i2c_master_isr(void *ptr) { - unsigned int isr_status, avail_bytes, updatetx; + unsigned int isr_status, avail_bytes; unsigned int bytes_to_send; - bool hold_quirk; + bool updatetx; struct cdns_i2c *id = ptr; /* Signal completion only after everything is updated */ int done_flag = 0; @@ -410,11 +410,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * Check if transfer size register needs to be updated again for a * large data receive operation. */ - updatetx = 0; - if (id->recv_count > id->curr_recv_count) - updatetx = 1; - - hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx; + updatetx = id->recv_count > id->curr_recv_count; /* When receiving, handle data interrupt and completion interrupt */ if (id->p_recv_buf && @@ -445,7 +441,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) break; } - if (cdns_is_holdquirk(id, hold_quirk)) + if (cdns_is_holdquirk(id, updatetx)) break; } @@ -456,7 +452,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * maintain transfer size non-zero while performing a large * receive operation. */ - if (cdns_is_holdquirk(id, hold_quirk)) { + if (cdns_is_holdquirk(id, updatetx)) { /* wait while fifo is full */ while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) != (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH)) @@ -478,22 +474,6 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) CDNS_I2C_XFER_SIZE_OFFSET); id->curr_recv_count = id->recv_count; } - } else if (id->recv_count && !hold_quirk && - !id->curr_recv_count) { - - /* Set the slave address in address register*/ - cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK, - CDNS_I2C_ADDR_OFFSET); - - if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) { - cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE; - } else { - cdns_i2c_writereg(id->recv_count, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = id->recv_count; - } } /* Clear hold (if not repeated start) and signal completion */ diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index e9e2db68b9fb..78fb1a4274a6 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -66,7 +66,7 @@ /* IMX I2C registers: * the I2C register offset is different between SoCs, - * to provid support for all these chips, split the + * to provide support for all these chips, split the * register offset into a fixed base address and a * variable shift value, then the full register offset * will be calculated by diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 56aa424fd71d..815cc561386b 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -49,7 +49,7 @@ #define MLXCPLD_LPCI2C_NACK_IND 2 #define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04 -#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c +#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0e #define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42 enum mlxcpld_i2c_frequency { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 20e53b167f81..c8539d0e12dd 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7304,7 +7304,9 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; conf->mddev = mddev; - if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) + ret = -ENOMEM; + conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!conf->stripe_hashtbl) goto abort; /* We init hash_locks[0] separately to that it can be used diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 2e0aa74ac185..95ef971b5e1c 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -13,10 +13,13 @@ lkdtm-$(CONFIG_LKDTM) += cfi.o lkdtm-$(CONFIG_LKDTM) += fortify.o lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o -KASAN_SANITIZE_rodata.o := n KASAN_SANITIZE_stackleak.o := n -KCOV_INSTRUMENT_rodata.o := n -CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) + +KASAN_SANITIZE_rodata.o := n +KCSAN_SANITIZE_rodata.o := n +KCOV_INSTRUMENT_rodata.o := n +OBJECT_FILES_NON_STANDARD_rodata.o := y +CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) $(RETHUNK_CFLAGS) OBJCOPYFLAGS := OBJCOPYFLAGS_rodata_objcopy.o := \ diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 86e867ffbb10..033be559a730 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1298,8 +1298,9 @@ static int sdhci_omap_probe(struct platform_device *pdev) /* * omap_device_pm_domain has callbacks to enable the main * functional clock, interface clock and also configure the - * SYSCONFIG register of omap devices. The callback will be invoked - * as part of pm_runtime_get_sync. + * SYSCONFIG register to clear any boot loader set voltage + * capabilities before calling sdhci_setup_host(). The + * callback will be invoked as part of pm_runtime_get_sync. */ pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 50); @@ -1441,7 +1442,8 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); - sdhci_runtime_suspend_host(host); + if (omap_host->con != -EINVAL) + sdhci_runtime_suspend_host(host); sdhci_omap_context_save(omap_host); @@ -1458,10 +1460,10 @@ static int __maybe_unused sdhci_omap_runtime_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - if (omap_host->con != -EINVAL) + if (omap_host->con != -EINVAL) { sdhci_omap_context_restore(omap_host); - - sdhci_runtime_resume_host(host, 0); + sdhci_runtime_resume_host(host, 0); + } return 0; } diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 889e40329956..93da23682d86 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -850,9 +850,10 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, unsigned int tRP_ps; bool use_half_period; int sample_delay_ps, sample_delay_factor; - u16 busy_timeout_cycles; + unsigned int busy_timeout_cycles; u8 wrn_dly_sel; unsigned long clk_rate, min_rate; + u64 busy_timeout_ps; if (sdr->tRC_min >= 30000) { /* ONFI non-EDO modes [0-3] */ @@ -885,7 +886,8 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps); data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps); data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps); - busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps); + busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max); + busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps); hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) | BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) | diff --git a/drivers/net/amt.c b/drivers/net/amt.c index febfcf2d92af..9a247eb7679c 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -449,7 +449,7 @@ out: dev_put(amt->dev); } -/* Non-existant group is created as INCLUDE {empty}: +/* Non-existent group is created as INCLUDE {empty}: * * RFC 3376 - 5.1. Action on Change of Interface State * diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 911aace42284..c79a5128235f 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -26,11 +26,6 @@ #include "ksz8795_reg.h" #include "ksz8.h" -static bool ksz_is_ksz88x3(struct ksz_device *dev) -{ - return dev->chip_id == 0x8830; -} - static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) { regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0); @@ -1116,7 +1111,6 @@ void ksz8_port_mirror_del(struct ksz_device *dev, int port, static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port) { struct ksz_port *p = &dev->ports[port]; - u8 data8; if (!p->interface && dev->compat_interface) { dev_warn(dev->dev, @@ -1125,40 +1119,6 @@ static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port) port); p->interface = dev->compat_interface; } - - /* Configure MII interface for proper network communication. */ - ksz_read8(dev, REG_PORT_5_CTRL_6, &data8); - data8 &= ~PORT_INTERFACE_TYPE; - data8 &= ~PORT_GMII_1GPS_MODE; - switch (p->interface) { - case PHY_INTERFACE_MODE_MII: - p->phydev.speed = SPEED_100; - break; - case PHY_INTERFACE_MODE_RMII: - data8 |= PORT_INTERFACE_RMII; - p->phydev.speed = SPEED_100; - break; - case PHY_INTERFACE_MODE_GMII: - data8 |= PORT_GMII_1GPS_MODE; - data8 |= PORT_INTERFACE_GMII; - p->phydev.speed = SPEED_1000; - break; - default: - data8 &= ~PORT_RGMII_ID_IN_ENABLE; - data8 &= ~PORT_RGMII_ID_OUT_ENABLE; - if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || - p->interface == PHY_INTERFACE_MODE_RGMII_RXID) - data8 |= PORT_RGMII_ID_IN_ENABLE; - if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || - p->interface == PHY_INTERFACE_MODE_RGMII_TXID) - data8 |= PORT_RGMII_ID_OUT_ENABLE; - data8 |= PORT_GMII_1GPS_MODE; - data8 |= PORT_INTERFACE_RGMII; - p->phydev.speed = SPEED_1000; - break; - } - ksz_write8(dev, REG_PORT_5_CTRL_6, data8); - p->phydev.duplex = 1; } void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port) diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h index a848eb4c54cb..77487d611824 100644 --- a/drivers/net/dsa/microchip/ksz8795_reg.h +++ b/drivers/net/dsa/microchip/ksz8795_reg.h @@ -170,15 +170,7 @@ #define REG_PORT_5_CTRL_6 0x56 #define PORT_MII_INTERNAL_CLOCK BIT(7) -#define PORT_GMII_1GPS_MODE BIT(6) -#define PORT_RGMII_ID_IN_ENABLE BIT(4) -#define PORT_RGMII_ID_OUT_ENABLE BIT(3) #define PORT_GMII_MAC_MODE BIT(2) -#define PORT_INTERFACE_TYPE 0x3 -#define PORT_INTERFACE_MII 0 -#define PORT_INTERFACE_RMII 1 -#define PORT_INTERFACE_GMII 2 -#define PORT_INTERFACE_RGMII 3 #define REG_PORT_1_CTRL_7 0x17 #define REG_PORT_2_CTRL_7 0x27 diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 6453642fa14c..4b14d80d27ed 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -19,11 +19,6 @@ #include "ksz_common.h" #include "ksz9477.h" -/* Used with variable features to indicate capabilities. */ -#define GBIT_SUPPORT BIT(0) -#define NEW_XMII BIT(1) -#define IS_9893 BIT(2) - static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) { regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0); @@ -866,142 +861,18 @@ void ksz9477_port_mirror_del(struct ksz_device *dev, int port, PORT_MIRROR_SNIFFER, false); } -static bool ksz9477_get_gbit(struct ksz_device *dev, u8 data) -{ - bool gbit; - - if (dev->features & NEW_XMII) - gbit = !(data & PORT_MII_NOT_1GBIT); - else - gbit = !!(data & PORT_MII_1000MBIT_S1); - return gbit; -} - -static void ksz9477_set_gbit(struct ksz_device *dev, bool gbit, u8 *data) -{ - if (dev->features & NEW_XMII) { - if (gbit) - *data &= ~PORT_MII_NOT_1GBIT; - else - *data |= PORT_MII_NOT_1GBIT; - } else { - if (gbit) - *data |= PORT_MII_1000MBIT_S1; - else - *data &= ~PORT_MII_1000MBIT_S1; - } -} - -static int ksz9477_get_xmii(struct ksz_device *dev, u8 data) -{ - int mode; - - if (dev->features & NEW_XMII) { - switch (data & PORT_MII_SEL_M) { - case PORT_MII_SEL: - mode = 0; - break; - case PORT_RMII_SEL: - mode = 1; - break; - case PORT_GMII_SEL: - mode = 2; - break; - default: - mode = 3; - } - } else { - switch (data & PORT_MII_SEL_M) { - case PORT_MII_SEL_S1: - mode = 0; - break; - case PORT_RMII_SEL_S1: - mode = 1; - break; - case PORT_GMII_SEL_S1: - mode = 2; - break; - default: - mode = 3; - } - } - return mode; -} - -static void ksz9477_set_xmii(struct ksz_device *dev, int mode, u8 *data) -{ - u8 xmii; - - if (dev->features & NEW_XMII) { - switch (mode) { - case 0: - xmii = PORT_MII_SEL; - break; - case 1: - xmii = PORT_RMII_SEL; - break; - case 2: - xmii = PORT_GMII_SEL; - break; - default: - xmii = PORT_RGMII_SEL; - break; - } - } else { - switch (mode) { - case 0: - xmii = PORT_MII_SEL_S1; - break; - case 1: - xmii = PORT_RMII_SEL_S1; - break; - case 2: - xmii = PORT_GMII_SEL_S1; - break; - default: - xmii = PORT_RGMII_SEL_S1; - break; - } - } - *data &= ~PORT_MII_SEL_M; - *data |= xmii; -} - static phy_interface_t ksz9477_get_interface(struct ksz_device *dev, int port) { phy_interface_t interface; bool gbit; - int mode; - u8 data8; if (port < dev->phy_port_cnt) return PHY_INTERFACE_MODE_NA; - ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8); - gbit = ksz9477_get_gbit(dev, data8); - mode = ksz9477_get_xmii(dev, data8); - switch (mode) { - case 2: - interface = PHY_INTERFACE_MODE_GMII; - if (gbit) - break; - fallthrough; - case 0: - interface = PHY_INTERFACE_MODE_MII; - break; - case 1: - interface = PHY_INTERFACE_MODE_RMII; - break; - default: - interface = PHY_INTERFACE_MODE_RGMII; - if (data8 & PORT_RGMII_ID_EG_ENABLE) - interface = PHY_INTERFACE_MODE_RGMII_TXID; - if (data8 & PORT_RGMII_ID_IG_ENABLE) { - interface = PHY_INTERFACE_MODE_RGMII_RXID; - if (data8 & PORT_RGMII_ID_EG_ENABLE) - interface = PHY_INTERFACE_MODE_RGMII_ID; - } - break; - } + + gbit = ksz_get_gbit(dev, port); + + interface = ksz_get_xmii(dev, port, gbit); + return interface; } @@ -1073,10 +944,9 @@ void ksz9477_get_caps(struct ksz_device *dev, int port, void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) { - struct ksz_port *p = &dev->ports[port]; struct dsa_switch *ds = dev->ds; - u8 data8, member; u16 data16; + u8 member; /* enable tag tail for host port */ if (cpu_port) @@ -1116,44 +986,6 @@ void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL, true); - - /* configure MAC to 1G & RGMII mode */ - ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8); - switch (p->interface) { - case PHY_INTERFACE_MODE_MII: - ksz9477_set_xmii(dev, 0, &data8); - ksz9477_set_gbit(dev, false, &data8); - p->phydev.speed = SPEED_100; - break; - case PHY_INTERFACE_MODE_RMII: - ksz9477_set_xmii(dev, 1, &data8); - ksz9477_set_gbit(dev, false, &data8); - p->phydev.speed = SPEED_100; - break; - case PHY_INTERFACE_MODE_GMII: - ksz9477_set_xmii(dev, 2, &data8); - ksz9477_set_gbit(dev, true, &data8); - p->phydev.speed = SPEED_1000; - break; - default: - ksz9477_set_xmii(dev, 3, &data8); - ksz9477_set_gbit(dev, true, &data8); - data8 &= ~PORT_RGMII_ID_IG_ENABLE; - data8 &= ~PORT_RGMII_ID_EG_ENABLE; - if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || - p->interface == PHY_INTERFACE_MODE_RGMII_RXID) - data8 |= PORT_RGMII_ID_IG_ENABLE; - if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || - p->interface == PHY_INTERFACE_MODE_RGMII_TXID) - data8 |= PORT_RGMII_ID_EG_ENABLE; - /* On KSZ9893, disable RGMII in-band status support */ - if (dev->features & IS_9893) - data8 &= ~PORT_MII_MAC_MODE; - p->phydev.speed = SPEED_1000; - break; - } - ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8); - p->phydev.duplex = 1; } if (cpu_port) @@ -1341,9 +1173,6 @@ int ksz9477_switch_init(struct ksz_device *dev) dev->features &= ~GBIT_SUPPORT; dev->phy_port_cnt = 2; } else { - /* Chip uses new XMII register definitions. */ - dev->features |= NEW_XMII; - /* Chip does not support gigabit. */ if (!(data8 & SW_GIGABIT_ABLE)) dev->features &= ~GBIT_SUPPORT; diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index d0cce4ca3cf9..ddf99d1e4bbd 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -1175,35 +1175,11 @@ #define PORT_LINK_STATUS_FAIL BIT(0) /* 3 - xMII */ -#define REG_PORT_XMII_CTRL_0 0x0300 - #define PORT_SGMII_SEL BIT(7) -#define PORT_MII_FULL_DUPLEX BIT(6) -#define PORT_MII_100MBIT BIT(4) #define PORT_GRXC_ENABLE BIT(0) -#define REG_PORT_XMII_CTRL_1 0x0301 - #define PORT_RMII_CLK_SEL BIT(7) -/* S1 */ -#define PORT_MII_1000MBIT_S1 BIT(6) -/* S2 */ -#define PORT_MII_NOT_1GBIT BIT(6) #define PORT_MII_SEL_EDGE BIT(5) -#define PORT_RGMII_ID_IG_ENABLE BIT(4) -#define PORT_RGMII_ID_EG_ENABLE BIT(3) -#define PORT_MII_MAC_MODE BIT(2) -#define PORT_MII_SEL_M 0x3 -/* S1 */ -#define PORT_MII_SEL_S1 0x0 -#define PORT_RMII_SEL_S1 0x1 -#define PORT_GMII_SEL_S1 0x2 -#define PORT_RGMII_SEL_S1 0x3 -/* S2 */ -#define PORT_RGMII_SEL 0x0 -#define PORT_RMII_SEL 0x1 -#define PORT_GMII_SEL 0x2 -#define PORT_MII_SEL 0x3 /* 4 - MAC */ #define REG_PORT_MAC_CTRL_0 0x0400 diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index fd12a68c1dcd..ed7d137cba99 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -222,8 +222,7 @@ static const struct ksz_dev_ops lan937x_dev_ops = { .mirror_add = ksz9477_port_mirror_add, .mirror_del = ksz9477_port_mirror_del, .get_caps = lan937x_phylink_get_caps, - .phylink_mac_config = lan937x_phylink_mac_config, - .phylink_mac_link_up = lan937x_phylink_mac_link_up, + .setup_rgmii_delay = lan937x_setup_rgmii_delay, .fdb_dump = ksz9477_fdb_dump, .fdb_add = ksz9477_fdb_add, .fdb_del = ksz9477_fdb_del, @@ -257,6 +256,8 @@ static const u16 ksz8795_regs[] = { [S_START_CTRL] = 0x01, [S_BROADCAST_CTRL] = 0x06, [S_MULTICAST_CTRL] = 0x04, + [P_XMII_CTRL_0] = 0x06, + [P_XMII_CTRL_1] = 0x56, }; static const u32 ksz8795_masks[] = { @@ -279,6 +280,24 @@ static const u32 ksz8795_masks[] = { [DYNAMIC_MAC_TABLE_FID] = GENMASK(26, 20), [DYNAMIC_MAC_TABLE_SRC_PORT] = GENMASK(26, 24), [DYNAMIC_MAC_TABLE_TIMESTAMP] = GENMASK(28, 27), + [P_MII_TX_FLOW_CTRL] = BIT(5), + [P_MII_RX_FLOW_CTRL] = BIT(5), +}; + +static const u8 ksz8795_xmii_ctrl0[] = { + [P_MII_100MBIT] = 0, + [P_MII_10MBIT] = 1, + [P_MII_FULL_DUPLEX] = 0, + [P_MII_HALF_DUPLEX] = 1, +}; + +static const u8 ksz8795_xmii_ctrl1[] = { + [P_RGMII_SEL] = 3, + [P_GMII_SEL] = 2, + [P_RMII_SEL] = 1, + [P_MII_SEL] = 0, + [P_GMII_1GBIT] = 1, + [P_GMII_NOT_1GBIT] = 0, }; static const u8 ksz8795_shifts[] = { @@ -351,20 +370,42 @@ static const u16 ksz9477_regs[] = { [S_START_CTRL] = 0x0300, [S_BROADCAST_CTRL] = 0x0332, [S_MULTICAST_CTRL] = 0x0331, + [P_XMII_CTRL_0] = 0x0300, + [P_XMII_CTRL_1] = 0x0301, }; static const u32 ksz9477_masks[] = { [ALU_STAT_WRITE] = 0, [ALU_STAT_READ] = 1, + [P_MII_TX_FLOW_CTRL] = BIT(5), + [P_MII_RX_FLOW_CTRL] = BIT(3), }; static const u8 ksz9477_shifts[] = { [ALU_STAT_INDEX] = 16, }; +static const u8 ksz9477_xmii_ctrl0[] = { + [P_MII_100MBIT] = 1, + [P_MII_10MBIT] = 0, + [P_MII_FULL_DUPLEX] = 1, + [P_MII_HALF_DUPLEX] = 0, +}; + +static const u8 ksz9477_xmii_ctrl1[] = { + [P_RGMII_SEL] = 0, + [P_RMII_SEL] = 1, + [P_GMII_SEL] = 2, + [P_MII_SEL] = 3, + [P_GMII_1GBIT] = 0, + [P_GMII_NOT_1GBIT] = 1, +}; + static const u32 lan937x_masks[] = { [ALU_STAT_WRITE] = 1, [ALU_STAT_READ] = 2, + [P_MII_TX_FLOW_CTRL] = BIT(5), + [P_MII_RX_FLOW_CTRL] = BIT(3), }; static const u8 lan937x_shifts[] = { @@ -388,6 +429,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz8795_regs, .masks = ksz8795_masks, .shifts = ksz8795_shifts, + .xmii_ctrl0 = ksz8795_xmii_ctrl0, + .xmii_ctrl1 = ksz8795_xmii_ctrl1, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -424,6 +467,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz8795_regs, .masks = ksz8795_masks, .shifts = ksz8795_shifts, + .xmii_ctrl0 = ksz8795_xmii_ctrl0, + .xmii_ctrl1 = ksz8795_xmii_ctrl1, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -446,6 +491,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz8795_regs, .masks = ksz8795_masks, .shifts = ksz8795_shifts, + .xmii_ctrl0 = ksz8795_xmii_ctrl0, + .xmii_ctrl1 = ksz8795_xmii_ctrl1, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -488,6 +535,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = ksz9477_masks, .shifts = ksz9477_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, false, true, false}, .supports_rmii = {false, false, false, false, @@ -514,6 +563,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = ksz9477_masks, .shifts = ksz9477_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, false, true, true}, .supports_rmii = {false, false, false, false, @@ -539,6 +590,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = ksz9477_masks, .shifts = ksz9477_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz8795_xmii_ctrl1, /* Same as ksz8795 */ .supports_mii = {false, false, true}, .supports_rmii = {false, false, true}, .supports_rgmii = {false, false, true}, @@ -561,6 +614,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = ksz9477_masks, .shifts = ksz9477_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, false, true, true}, .supports_rmii = {false, false, false, false, @@ -586,6 +641,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = lan937x_masks, .shifts = lan937x_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -607,6 +664,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = lan937x_masks, .shifts = lan937x_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, true, true}, .supports_rmii = {false, false, false, false, true, true}, .supports_rgmii = {false, false, false, false, true, true}, @@ -628,6 +687,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = lan937x_masks, .shifts = lan937x_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, true, true, false, false}, .supports_rmii = {false, false, false, false, @@ -653,6 +714,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = lan937x_masks, .shifts = lan937x_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, true, true, false, false}, .supports_rmii = {false, false, false, false, @@ -678,6 +741,8 @@ const struct ksz_chip_data ksz_switch_chips[] = { .regs = ksz9477_regs, .masks = lan937x_masks, .shifts = lan937x_shifts, + .xmii_ctrl0 = ksz9477_xmii_ctrl0, + .xmii_ctrl1 = ksz9477_xmii_ctrl1, .supports_mii = {false, false, false, false, true, true, false, false}, .supports_rmii = {false, false, false, false, @@ -1343,14 +1408,205 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port) return dev->dev_ops->max_mtu(dev, port); } +static void ksz_set_xmii(struct ksz_device *dev, int port, + phy_interface_t interface) +{ + const u8 *bitval = dev->info->xmii_ctrl1; + struct ksz_port *p = &dev->ports[port]; + const u16 *regs = dev->info->regs; + u8 data8; + + ksz_pread8(dev, port, regs[P_XMII_CTRL_1], &data8); + + data8 &= ~(P_MII_SEL_M | P_RGMII_ID_IG_ENABLE | + P_RGMII_ID_EG_ENABLE); + + switch (interface) { + case PHY_INTERFACE_MODE_MII: + data8 |= bitval[P_MII_SEL]; + break; + case PHY_INTERFACE_MODE_RMII: + data8 |= bitval[P_RMII_SEL]; + break; + case PHY_INTERFACE_MODE_GMII: + data8 |= bitval[P_GMII_SEL]; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + data8 |= bitval[P_RGMII_SEL]; + /* On KSZ9893, disable RGMII in-band status support */ + if (dev->features & IS_9893) + data8 &= ~P_MII_MAC_MODE; + break; + default: + dev_err(dev->dev, "Unsupported interface '%s' for port %d\n", + phy_modes(interface), port); + return; + } + + if (p->rgmii_tx_val) + data8 |= P_RGMII_ID_EG_ENABLE; + + if (p->rgmii_rx_val) + data8 |= P_RGMII_ID_IG_ENABLE; + + /* Write the updated value */ + ksz_pwrite8(dev, port, regs[P_XMII_CTRL_1], data8); +} + +phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit) +{ + const u8 *bitval = dev->info->xmii_ctrl1; + const u16 *regs = dev->info->regs; + phy_interface_t interface; + u8 data8; + u8 val; + + ksz_pread8(dev, port, regs[P_XMII_CTRL_1], &data8); + + val = FIELD_GET(P_MII_SEL_M, data8); + + if (val == bitval[P_MII_SEL]) { + if (gbit) + interface = PHY_INTERFACE_MODE_GMII; + else + interface = PHY_INTERFACE_MODE_MII; + } else if (val == bitval[P_RMII_SEL]) { + interface = PHY_INTERFACE_MODE_RGMII; + } else { + interface = PHY_INTERFACE_MODE_RGMII; + if (data8 & P_RGMII_ID_EG_ENABLE) + interface = PHY_INTERFACE_MODE_RGMII_TXID; + if (data8 & P_RGMII_ID_IG_ENABLE) { + interface = PHY_INTERFACE_MODE_RGMII_RXID; + if (data8 & P_RGMII_ID_EG_ENABLE) + interface = PHY_INTERFACE_MODE_RGMII_ID; + } + } + + return interface; +} + static void ksz_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { struct ksz_device *dev = ds->priv; + if (ksz_is_ksz88x3(dev)) + return; + + /* Internal PHYs */ + if (dev->info->internal_phy[port]) + return; + + if (phylink_autoneg_inband(mode)) { + dev_err(dev->dev, "In-band AN not supported!\n"); + return; + } + + ksz_set_xmii(dev, port, state->interface); + if (dev->dev_ops->phylink_mac_config) dev->dev_ops->phylink_mac_config(dev, port, mode, state); + + if (dev->dev_ops->setup_rgmii_delay) + dev->dev_ops->setup_rgmii_delay(dev, port); +} + +bool ksz_get_gbit(struct ksz_device *dev, int port) +{ + const u8 *bitval = dev->info->xmii_ctrl1; + const u16 *regs = dev->info->regs; + bool gbit = false; + u8 data8; + bool val; + + ksz_pread8(dev, port, regs[P_XMII_CTRL_1], &data8); + + val = FIELD_GET(P_GMII_1GBIT_M, data8); + + if (val == bitval[P_GMII_1GBIT]) + gbit = true; + + return gbit; +} + +static void ksz_set_gbit(struct ksz_device *dev, int port, bool gbit) +{ + const u8 *bitval = dev->info->xmii_ctrl1; + const u16 *regs = dev->info->regs; + u8 data8; + + ksz_pread8(dev, port, regs[P_XMII_CTRL_1], &data8); + + data8 &= ~P_GMII_1GBIT_M; + + if (gbit) + data8 |= FIELD_PREP(P_GMII_1GBIT_M, bitval[P_GMII_1GBIT]); + else + data8 |= FIELD_PREP(P_GMII_1GBIT_M, bitval[P_GMII_NOT_1GBIT]); + + /* Write the updated value */ + ksz_pwrite8(dev, port, regs[P_XMII_CTRL_1], data8); +} + +static void ksz_set_100_10mbit(struct ksz_device *dev, int port, int speed) +{ + const u8 *bitval = dev->info->xmii_ctrl0; + const u16 *regs = dev->info->regs; + u8 data8; + + ksz_pread8(dev, port, regs[P_XMII_CTRL_0], &data8); + + data8 &= ~P_MII_100MBIT_M; + + if (speed == SPEED_100) + data8 |= FIELD_PREP(P_MII_100MBIT_M, bitval[P_MII_100MBIT]); + else + data8 |= FIELD_PREP(P_MII_100MBIT_M, bitval[P_MII_10MBIT]); + + /* Write the updated value */ + ksz_pwrite8(dev, port, regs[P_XMII_CTRL_0], data8); +} + +static void ksz_port_set_xmii_speed(struct ksz_device *dev, int port, int speed) +{ + if (speed == SPEED_1000) + ksz_set_gbit(dev, port, true); + else + ksz_set_gbit(dev, port, false); + + if (speed == SPEED_100 || speed == SPEED_10) + ksz_set_100_10mbit(dev, port, speed); +} + +static void ksz_duplex_flowctrl(struct ksz_device *dev, int port, int duplex, + bool tx_pause, bool rx_pause) +{ + const u8 *bitval = dev->info->xmii_ctrl0; + const u32 *masks = dev->info->masks; + const u16 *regs = dev->info->regs; + u8 mask; + u8 val; + + mask = P_MII_DUPLEX_M | masks[P_MII_TX_FLOW_CTRL] | + masks[P_MII_RX_FLOW_CTRL]; + + if (duplex == DUPLEX_FULL) + val = FIELD_PREP(P_MII_DUPLEX_M, bitval[P_MII_FULL_DUPLEX]); + else + val = FIELD_PREP(P_MII_DUPLEX_M, bitval[P_MII_HALF_DUPLEX]); + + if (tx_pause) + val |= masks[P_MII_TX_FLOW_CTRL]; + + if (rx_pause) + val |= masks[P_MII_RX_FLOW_CTRL]; + + ksz_prmw8(dev, port, regs[P_XMII_CTRL_0], mask, val); } static void ksz_phylink_mac_link_up(struct dsa_switch *ds, int port, @@ -1360,6 +1616,19 @@ static void ksz_phylink_mac_link_up(struct dsa_switch *ds, int port, int duplex, bool tx_pause, bool rx_pause) { struct ksz_device *dev = ds->priv; + struct ksz_port *p; + + p = &dev->ports[port]; + + /* Internal PHYs */ + if (dev->info->internal_phy[port]) + return; + + p->phydev.speed = speed; + + ksz_port_set_xmii_speed(dev, port, speed); + + ksz_duplex_flowctrl(dev, port, duplex, tx_pause, rx_pause); if (dev->dev_ops->phylink_mac_link_up) dev->dev_ops->phylink_mac_link_up(dev, port, mode, interface, @@ -1494,6 +1763,43 @@ struct ksz_device *ksz_switch_alloc(struct device *base, void *priv) } EXPORT_SYMBOL(ksz_switch_alloc); +static void ksz_parse_rgmii_delay(struct ksz_device *dev, int port_num, + struct device_node *port_dn) +{ + phy_interface_t phy_mode = dev->ports[port_num].interface; + int rx_delay = -1, tx_delay = -1; + + if (!phy_interface_mode_is_rgmii(phy_mode)) + return; + + of_property_read_u32(port_dn, "rx-internal-delay-ps", &rx_delay); + of_property_read_u32(port_dn, "tx-internal-delay-ps", &tx_delay); + + if (rx_delay == -1 && tx_delay == -1) { + dev_warn(dev->dev, + "Port %d interpreting RGMII delay settings based on \"phy-mode\" property, " + "please update device tree to specify \"rx-internal-delay-ps\" and " + "\"tx-internal-delay-ps\"", + port_num); + + if (phy_mode == PHY_INTERFACE_MODE_RGMII_RXID || + phy_mode == PHY_INTERFACE_MODE_RGMII_ID) + rx_delay = 2000; + + if (phy_mode == PHY_INTERFACE_MODE_RGMII_TXID || + phy_mode == PHY_INTERFACE_MODE_RGMII_ID) + tx_delay = 2000; + } + + if (rx_delay < 0) + rx_delay = 0; + if (tx_delay < 0) + tx_delay = 0; + + dev->ports[port_num].rgmii_rx_val = rx_delay; + dev->ports[port_num].rgmii_tx_val = tx_delay; +} + int ksz_switch_register(struct ksz_device *dev) { const struct ksz_chip_data *info; @@ -1591,6 +1897,8 @@ int ksz_switch_register(struct ksz_device *dev) } of_get_phy_mode(port, &dev->ports[port_num].interface); + + ksz_parse_rgmii_delay(dev, port_num, port); } of_node_put(ports); } diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index d5dddb7ec045..764ada3a0f42 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -51,6 +51,8 @@ struct ksz_chip_data { const u16 *regs; const u32 *masks; const u8 *shifts; + const u8 *xmii_ctrl0; + const u8 *xmii_ctrl1; int stp_ctrl_reg; int broadcast_ctrl_reg; int multicast_ctrl_reg; @@ -77,6 +79,8 @@ struct ksz_port { struct ksz_port_mib mib; phy_interface_t interface; u16 max_frame; + u32 rgmii_tx_val; + u32 rgmii_rx_val; }; struct ksz_device { @@ -169,6 +173,8 @@ enum ksz_regs { S_START_CTRL, S_BROADCAST_CTRL, S_MULTICAST_CTRL, + P_XMII_CTRL_0, + P_XMII_CTRL_1, }; enum ksz_masks { @@ -193,6 +199,8 @@ enum ksz_masks { DYNAMIC_MAC_TABLE_TIMESTAMP, ALU_STAT_WRITE, ALU_STAT_READ, + P_MII_TX_FLOW_CTRL, + P_MII_RX_FLOW_CTRL, }; enum ksz_shifts { @@ -208,6 +216,22 @@ enum ksz_shifts { ALU_STAT_INDEX, }; +enum ksz_xmii_ctrl0 { + P_MII_100MBIT, + P_MII_10MBIT, + P_MII_FULL_DUPLEX, + P_MII_HALF_DUPLEX, +}; + +enum ksz_xmii_ctrl1 { + P_RGMII_SEL, + P_RMII_SEL, + P_GMII_SEL, + P_MII_SEL, + P_GMII_1GBIT, + P_GMII_NOT_1GBIT, +}; + struct alu_struct { /* entry 1 */ u8 is_static:1; @@ -279,6 +303,7 @@ struct ksz_dev_ops { phy_interface_t interface, struct phy_device *phydev, int speed, int duplex, bool tx_pause, bool rx_pause); + void (*setup_rgmii_delay)(struct ksz_device *dev, int port); void (*config_cpu_port)(struct dsa_switch *ds); int (*enable_stp_addr)(struct ksz_device *dev); int (*reset)(struct ksz_device *dev); @@ -293,6 +318,8 @@ void ksz_switch_remove(struct ksz_device *dev); void ksz_init_mib_timer(struct ksz_device *dev); void ksz_r_mib_stats64(struct ksz_device *dev, int port); void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state); +bool ksz_get_gbit(struct ksz_device *dev, int port); +phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit); extern const struct ksz_chip_data ksz_switch_chips[]; /* Common register access functions */ @@ -399,6 +426,14 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data); } +static inline void ksz_prmw8(struct ksz_device *dev, int port, int offset, + u8 mask, u8 val) +{ + regmap_update_bits(dev->regmap[0], + dev->dev_ops->get_port_addr(port, offset), + mask, val); +} + static inline void ksz_regmap_lock(void *__mtx) { struct mutex *mtx = __mtx; @@ -411,6 +446,11 @@ static inline void ksz_regmap_unlock(void *__mtx) mutex_unlock(mtx); } +static inline bool ksz_is_ksz88x3(struct ksz_device *dev) +{ + return dev->chip_id == KSZ8830_CHIP_ID; +} + static inline int is_lan937x(struct ksz_device *dev) { return dev->chip_id == LAN9370_CHIP_ID || @@ -456,6 +496,20 @@ static inline int is_lan937x(struct ksz_device *dev) #define SW_START 0x01 +/* Used with variable features to indicate capabilities. */ +#define GBIT_SUPPORT BIT(0) +#define IS_9893 BIT(2) + +/* xMII configuration */ +#define P_MII_DUPLEX_M BIT(6) +#define P_MII_100MBIT_M BIT(4) + +#define P_GMII_1GBIT_M BIT(6) +#define P_RGMII_ID_IG_ENABLE BIT(4) +#define P_RGMII_ID_EG_ENABLE BIT(3) +#define P_MII_MAC_MODE BIT(2) +#define P_MII_SEL_M 0x3 + /* Regmap tables generation */ #define KSZ_SPI_OP_RD 3 #define KSZ_SPI_OP_WR 2 diff --git a/drivers/net/dsa/microchip/lan937x.h b/drivers/net/dsa/microchip/lan937x.h index 72ba9cb2fbc6..4e0b1dccec27 100644 --- a/drivers/net/dsa/microchip/lan937x.h +++ b/drivers/net/dsa/microchip/lan937x.h @@ -17,11 +17,5 @@ void lan937x_w_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 val); int lan937x_change_mtu(struct ksz_device *dev, int port, int new_mtu); void lan937x_phylink_get_caps(struct ksz_device *dev, int port, struct phylink_config *config); -void lan937x_phylink_mac_link_up(struct ksz_device *dev, int port, - unsigned int mode, phy_interface_t interface, - struct phy_device *phydev, int speed, - int duplex, bool tx_pause, bool rx_pause); -void lan937x_phylink_mac_config(struct ksz_device *dev, int port, - unsigned int mode, - const struct phylink_link_state *state); +void lan937x_setup_rgmii_delay(struct ksz_device *dev, int port); #endif diff --git a/drivers/net/dsa/microchip/lan937x_main.c b/drivers/net/dsa/microchip/lan937x_main.c index c29d175ca6f7..daedd2bf20c1 100644 --- a/drivers/net/dsa/microchip/lan937x_main.c +++ b/drivers/net/dsa/microchip/lan937x_main.c @@ -234,6 +234,8 @@ int lan937x_reset_switch(struct ksz_device *dev) void lan937x_port_setup(struct ksz_device *dev, int port, bool cpu_port) { + const u32 *masks = dev->info->masks; + const u16 *regs = dev->info->regs; struct dsa_switch *ds = dev->ds; u8 member; @@ -254,8 +256,9 @@ void lan937x_port_setup(struct ksz_device *dev, int port, bool cpu_port) lan937x_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_PRIO_ENABLE, true); if (!dev->info->internal_phy[port]) - lan937x_port_cfg(dev, port, REG_PORT_XMII_CTRL_0, - PORT_MII_TX_FLOW_CTRL | PORT_MII_RX_FLOW_CTRL, + lan937x_port_cfg(dev, port, regs[P_XMII_CTRL_0], + masks[P_MII_TX_FLOW_CTRL] | + masks[P_MII_RX_FLOW_CTRL], true); if (cpu_port) @@ -312,75 +315,43 @@ int lan937x_change_mtu(struct ksz_device *dev, int port, int new_mtu) return 0; } -static void lan937x_config_gbit(struct ksz_device *dev, bool gbit, u8 *data) +static void lan937x_set_tune_adj(struct ksz_device *dev, int port, + u16 reg, u8 val) { - if (gbit) - *data &= ~PORT_MII_NOT_1GBIT; - else - *data |= PORT_MII_NOT_1GBIT; -} + u16 data16; -static void lan937x_mac_config(struct ksz_device *dev, int port, - phy_interface_t interface) -{ - u8 data8; - - ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8); - - /* clear MII selection & set it based on interface later */ - data8 &= ~PORT_MII_SEL_M; - - /* configure MAC based on interface */ - switch (interface) { - case PHY_INTERFACE_MODE_MII: - lan937x_config_gbit(dev, false, &data8); - data8 |= PORT_MII_SEL; - break; - case PHY_INTERFACE_MODE_RMII: - lan937x_config_gbit(dev, false, &data8); - data8 |= PORT_RMII_SEL; - break; - default: - dev_err(dev->dev, "Unsupported interface '%s' for port %d\n", - phy_modes(interface), port); - return; - } + ksz_pread16(dev, port, reg, &data16); - /* Write the updated value */ - ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8); + /* Update tune Adjust */ + data16 |= FIELD_PREP(PORT_TUNE_ADJ, val); + ksz_pwrite16(dev, port, reg, data16); + + /* write DLL reset to take effect */ + data16 |= PORT_DLL_RESET; + ksz_pwrite16(dev, port, reg, data16); } -static void lan937x_config_interface(struct ksz_device *dev, int port, - int speed, int duplex, - bool tx_pause, bool rx_pause) +static void lan937x_set_rgmii_tx_delay(struct ksz_device *dev, int port) { - u8 xmii_ctrl0, xmii_ctrl1; - - ksz_pread8(dev, port, REG_PORT_XMII_CTRL_0, &xmii_ctrl0); - ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &xmii_ctrl1); - - xmii_ctrl0 &= ~(PORT_MII_100MBIT | PORT_MII_FULL_DUPLEX | - PORT_MII_TX_FLOW_CTRL | PORT_MII_RX_FLOW_CTRL); - - if (speed == SPEED_1000) - lan937x_config_gbit(dev, true, &xmii_ctrl1); - else - lan937x_config_gbit(dev, false, &xmii_ctrl1); + u8 val; - if (speed == SPEED_100) - xmii_ctrl0 |= PORT_MII_100MBIT; + /* Apply different codes based on the ports as per characterization + * results + */ + val = (port == LAN937X_RGMII_1_PORT) ? RGMII_1_TX_DELAY_2NS : + RGMII_2_TX_DELAY_2NS; - if (duplex) - xmii_ctrl0 |= PORT_MII_FULL_DUPLEX; + lan937x_set_tune_adj(dev, port, REG_PORT_XMII_CTRL_5, val); +} - if (tx_pause) - xmii_ctrl0 |= PORT_MII_TX_FLOW_CTRL; +static void lan937x_set_rgmii_rx_delay(struct ksz_device *dev, int port) +{ + u8 val; - if (rx_pause) - xmii_ctrl0 |= PORT_MII_RX_FLOW_CTRL; + val = (port == LAN937X_RGMII_1_PORT) ? RGMII_1_RX_DELAY_2NS : + RGMII_2_RX_DELAY_2NS; - ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_0, xmii_ctrl0); - ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, xmii_ctrl1); + lan937x_set_tune_adj(dev, port, REG_PORT_XMII_CTRL_4, val); } void lan937x_phylink_get_caps(struct ksz_device *dev, int port, @@ -395,33 +366,21 @@ void lan937x_phylink_get_caps(struct ksz_device *dev, int port, } } -void lan937x_phylink_mac_link_up(struct ksz_device *dev, int port, - unsigned int mode, phy_interface_t interface, - struct phy_device *phydev, int speed, - int duplex, bool tx_pause, bool rx_pause) +void lan937x_setup_rgmii_delay(struct ksz_device *dev, int port) { - /* Internal PHYs */ - if (dev->info->internal_phy[port]) - return; + struct ksz_port *p = &dev->ports[port]; - lan937x_config_interface(dev, port, speed, duplex, - tx_pause, rx_pause); -} - -void lan937x_phylink_mac_config(struct ksz_device *dev, int port, - unsigned int mode, - const struct phylink_link_state *state) -{ - /* Internal PHYs */ - if (dev->info->internal_phy[port]) - return; - - if (phylink_autoneg_inband(mode)) { - dev_err(dev->dev, "In-band AN not supported!\n"); - return; + if (p->rgmii_tx_val) { + lan937x_set_rgmii_tx_delay(dev, port); + dev_info(dev->dev, "Applied rgmii tx delay for the port %d\n", + port); } - lan937x_mac_config(dev, port, state->interface); + if (p->rgmii_rx_val) { + lan937x_set_rgmii_rx_delay(dev, port); + dev_info(dev->dev, "Applied rgmii rx delay for the port %d\n", + port); + } } int lan937x_setup(struct dsa_switch *ds) diff --git a/drivers/net/dsa/microchip/lan937x_reg.h b/drivers/net/dsa/microchip/lan937x_reg.h index c187d0a3e7fa..ba4adaddb3ec 100644 --- a/drivers/net/dsa/microchip/lan937x_reg.h +++ b/drivers/net/dsa/microchip/lan937x_reg.h @@ -131,24 +131,16 @@ #define REG_PORT_T1_PHY_CTRL_BASE 0x0100 /* 3 - xMII */ -#define REG_PORT_XMII_CTRL_0 0x0300 #define PORT_SGMII_SEL BIT(7) -#define PORT_MII_FULL_DUPLEX BIT(6) -#define PORT_MII_TX_FLOW_CTRL BIT(5) -#define PORT_MII_100MBIT BIT(4) -#define PORT_MII_RX_FLOW_CTRL BIT(3) #define PORT_GRXC_ENABLE BIT(0) -#define REG_PORT_XMII_CTRL_1 0x0301 -#define PORT_MII_NOT_1GBIT BIT(6) #define PORT_MII_SEL_EDGE BIT(5) -#define PORT_RGMII_ID_IG_ENABLE BIT(4) -#define PORT_RGMII_ID_EG_ENABLE BIT(3) -#define PORT_MII_MAC_MODE BIT(2) -#define PORT_MII_SEL_M 0x3 -#define PORT_RGMII_SEL 0x0 -#define PORT_RMII_SEL 0x1 -#define PORT_MII_SEL 0x2 + +#define REG_PORT_XMII_CTRL_4 0x0304 +#define REG_PORT_XMII_CTRL_5 0x0306 + +#define PORT_DLL_RESET BIT(15) +#define PORT_TUNE_ADJ GENMASK(13, 7) /* 4 - MAC */ #define REG_PORT_MAC_CTRL_0 0x0400 @@ -175,6 +167,18 @@ #define P_PRIO_CTRL REG_PORT_MRI_PRIO_CTRL +/* The port number as per the datasheet */ +#define RGMII_2_PORT_NUM 5 +#define RGMII_1_PORT_NUM 6 + +#define LAN937X_RGMII_2_PORT (RGMII_2_PORT_NUM - 1) +#define LAN937X_RGMII_1_PORT (RGMII_1_PORT_NUM - 1) + +#define RGMII_1_TX_DELAY_2NS 2 +#define RGMII_2_TX_DELAY_2NS 0 +#define RGMII_1_RX_DELAY_2NS 0x1B +#define RGMII_2_RX_DELAY_2NS 0x14 + #define LAN937X_TAG_LEN 2 #endif diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 37b649501500..07e9a4da924c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3293,7 +3293,12 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * port and all DSA ports to their maximum bandwidth and full duplex. */ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { - unsigned long caps = dp->pl_config.mac_capabilities; + struct phylink_config pl_config = {}; + unsigned long caps; + + mv88e6xxx_get_caps(ds, port, &pl_config); + + caps = pl_config.mac_capabilities; if (chip->info->ops->port_max_speed_mode) mode = chip->info->ops->port_max_speed_mode(port); diff --git a/drivers/net/dsa/qca/Makefile b/drivers/net/dsa/qca/Makefile index 40bb7c27285b..701f1d199e93 100644 --- a/drivers/net/dsa/qca/Makefile +++ b/drivers/net/dsa/qca/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_NET_DSA_AR9331) += ar9331.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o +qca8k-y += qca8k-common.o qca8k-8xxx.o diff --git a/drivers/net/dsa/qca/qca8k.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 1cbb05b0323f..1d3e7782a71f 100644 --- a/drivers/net/dsa/qca/qca8k.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -15,7 +15,6 @@ #include <linux/of_net.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> -#include <linux/if_bridge.h> #include <linux/mdio.h> #include <linux/phylink.h> #include <linux/gpio/consumer.h> @@ -24,57 +23,6 @@ #include "qca8k.h" -#define MIB_DESC(_s, _o, _n) \ - { \ - .size = (_s), \ - .offset = (_o), \ - .name = (_n), \ - } - -static const struct qca8k_mib_desc ar8327_mib[] = { - MIB_DESC(1, 0x00, "RxBroad"), - MIB_DESC(1, 0x04, "RxPause"), - MIB_DESC(1, 0x08, "RxMulti"), - MIB_DESC(1, 0x0c, "RxFcsErr"), - MIB_DESC(1, 0x10, "RxAlignErr"), - MIB_DESC(1, 0x14, "RxRunt"), - MIB_DESC(1, 0x18, "RxFragment"), - MIB_DESC(1, 0x1c, "Rx64Byte"), - MIB_DESC(1, 0x20, "Rx128Byte"), - MIB_DESC(1, 0x24, "Rx256Byte"), - MIB_DESC(1, 0x28, "Rx512Byte"), - MIB_DESC(1, 0x2c, "Rx1024Byte"), - MIB_DESC(1, 0x30, "Rx1518Byte"), - MIB_DESC(1, 0x34, "RxMaxByte"), - MIB_DESC(1, 0x38, "RxTooLong"), - MIB_DESC(2, 0x3c, "RxGoodByte"), - MIB_DESC(2, 0x44, "RxBadByte"), - MIB_DESC(1, 0x4c, "RxOverFlow"), - MIB_DESC(1, 0x50, "Filtered"), - MIB_DESC(1, 0x54, "TxBroad"), - MIB_DESC(1, 0x58, "TxPause"), - MIB_DESC(1, 0x5c, "TxMulti"), - MIB_DESC(1, 0x60, "TxUnderRun"), - MIB_DESC(1, 0x64, "Tx64Byte"), - MIB_DESC(1, 0x68, "Tx128Byte"), - MIB_DESC(1, 0x6c, "Tx256Byte"), - MIB_DESC(1, 0x70, "Tx512Byte"), - MIB_DESC(1, 0x74, "Tx1024Byte"), - MIB_DESC(1, 0x78, "Tx1518Byte"), - MIB_DESC(1, 0x7c, "TxMaxByte"), - MIB_DESC(1, 0x80, "TxOverSize"), - MIB_DESC(2, 0x84, "TxByte"), - MIB_DESC(1, 0x8c, "TxCollision"), - MIB_DESC(1, 0x90, "TxAbortCol"), - MIB_DESC(1, 0x94, "TxMultiCol"), - MIB_DESC(1, 0x98, "TxSingleCol"), - MIB_DESC(1, 0x9c, "TxExcDefer"), - MIB_DESC(1, 0xa0, "TxDefer"), - MIB_DESC(1, 0xa4, "TxLateCol"), - MIB_DESC(1, 0xa8, "RXUnicast"), - MIB_DESC(1, 0xac, "TXUnicast"), -}; - static void qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) { @@ -184,24 +132,6 @@ qca8k_set_page(struct qca8k_priv *priv, u16 page) return 0; } -static int -qca8k_read(struct qca8k_priv *priv, u32 reg, u32 *val) -{ - return regmap_read(priv->regmap, reg, val); -} - -static int -qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val) -{ - return regmap_write(priv->regmap, reg, val); -} - -static int -qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val) -{ - return regmap_update_bits(priv->regmap, reg, mask, write_val); -} - static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb) { struct qca8k_mgmt_eth_data *mgmt_eth_data; @@ -412,43 +342,6 @@ qca8k_regmap_update_bits_eth(struct qca8k_priv *priv, u32 reg, u32 mask, u32 wri } static int -qca8k_bulk_read(struct qca8k_priv *priv, u32 reg, u32 *val, int len) -{ - int i, count = len / sizeof(u32), ret; - - if (priv->mgmt_master && !qca8k_read_eth(priv, reg, val, len)) - return 0; - - for (i = 0; i < count; i++) { - ret = regmap_read(priv->regmap, reg + (i * 4), val + i); - if (ret < 0) - return ret; - } - - return 0; -} - -static int -qca8k_bulk_write(struct qca8k_priv *priv, u32 reg, u32 *val, int len) -{ - int i, count = len / sizeof(u32), ret; - u32 tmp; - - if (priv->mgmt_master && !qca8k_write_eth(priv, reg, val, len)) - return 0; - - for (i = 0; i < count; i++) { - tmp = val[i]; - - ret = regmap_write(priv->regmap, reg + (i * 4), tmp); - if (ret < 0) - return ret; - } - - return 0; -} - -static int qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val) { struct qca8k_priv *priv = (struct qca8k_priv *)ctx; @@ -534,30 +427,6 @@ exit: return ret; } -static const struct regmap_range qca8k_readable_ranges[] = { - regmap_reg_range(0x0000, 0x00e4), /* Global control */ - regmap_reg_range(0x0100, 0x0168), /* EEE control */ - regmap_reg_range(0x0200, 0x0270), /* Parser control */ - regmap_reg_range(0x0400, 0x0454), /* ACL */ - regmap_reg_range(0x0600, 0x0718), /* Lookup */ - regmap_reg_range(0x0800, 0x0b70), /* QM */ - regmap_reg_range(0x0c00, 0x0c80), /* PKT */ - regmap_reg_range(0x0e00, 0x0e98), /* L3 */ - regmap_reg_range(0x1000, 0x10ac), /* MIB - Port0 */ - regmap_reg_range(0x1100, 0x11ac), /* MIB - Port1 */ - regmap_reg_range(0x1200, 0x12ac), /* MIB - Port2 */ - regmap_reg_range(0x1300, 0x13ac), /* MIB - Port3 */ - regmap_reg_range(0x1400, 0x14ac), /* MIB - Port4 */ - regmap_reg_range(0x1500, 0x15ac), /* MIB - Port5 */ - regmap_reg_range(0x1600, 0x16ac), /* MIB - Port6 */ - -}; - -static const struct regmap_access_table qca8k_readable_table = { - .yes_ranges = qca8k_readable_ranges, - .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), -}; - static struct regmap_config qca8k_regmap_config = { .reg_bits = 16, .val_bits = 32, @@ -572,385 +441,6 @@ static struct regmap_config qca8k_regmap_config = { }; static int -qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask) -{ - u32 val; - - return regmap_read_poll_timeout(priv->regmap, reg, val, !(val & mask), 0, - QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC); -} - -static int -qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb) -{ - u32 reg[3]; - int ret; - - /* load the ARL table into an array */ - ret = qca8k_bulk_read(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg)); - if (ret) - return ret; - - /* vid - 83:72 */ - fdb->vid = FIELD_GET(QCA8K_ATU_VID_MASK, reg[2]); - /* aging - 67:64 */ - fdb->aging = FIELD_GET(QCA8K_ATU_STATUS_MASK, reg[2]); - /* portmask - 54:48 */ - fdb->port_mask = FIELD_GET(QCA8K_ATU_PORT_MASK, reg[1]); - /* mac - 47:0 */ - fdb->mac[0] = FIELD_GET(QCA8K_ATU_ADDR0_MASK, reg[1]); - fdb->mac[1] = FIELD_GET(QCA8K_ATU_ADDR1_MASK, reg[1]); - fdb->mac[2] = FIELD_GET(QCA8K_ATU_ADDR2_MASK, reg[0]); - fdb->mac[3] = FIELD_GET(QCA8K_ATU_ADDR3_MASK, reg[0]); - fdb->mac[4] = FIELD_GET(QCA8K_ATU_ADDR4_MASK, reg[0]); - fdb->mac[5] = FIELD_GET(QCA8K_ATU_ADDR5_MASK, reg[0]); - - return 0; -} - -static void -qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac, - u8 aging) -{ - u32 reg[3] = { 0 }; - - /* vid - 83:72 */ - reg[2] = FIELD_PREP(QCA8K_ATU_VID_MASK, vid); - /* aging - 67:64 */ - reg[2] |= FIELD_PREP(QCA8K_ATU_STATUS_MASK, aging); - /* portmask - 54:48 */ - reg[1] = FIELD_PREP(QCA8K_ATU_PORT_MASK, port_mask); - /* mac - 47:0 */ - reg[1] |= FIELD_PREP(QCA8K_ATU_ADDR0_MASK, mac[0]); - reg[1] |= FIELD_PREP(QCA8K_ATU_ADDR1_MASK, mac[1]); - reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR2_MASK, mac[2]); - reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR3_MASK, mac[3]); - reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR4_MASK, mac[4]); - reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR5_MASK, mac[5]); - - /* load the array into the ARL table */ - qca8k_bulk_write(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg)); -} - -static int -qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port) -{ - u32 reg; - int ret; - - /* Set the command and FDB index */ - reg = QCA8K_ATU_FUNC_BUSY; - reg |= cmd; - if (port >= 0) { - reg |= QCA8K_ATU_FUNC_PORT_EN; - reg |= FIELD_PREP(QCA8K_ATU_FUNC_PORT_MASK, port); - } - - /* Write the function register triggering the table access */ - ret = qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg); - if (ret) - return ret; - - /* wait for completion */ - ret = qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY); - if (ret) - return ret; - - /* Check for table full violation when adding an entry */ - if (cmd == QCA8K_FDB_LOAD) { - ret = qca8k_read(priv, QCA8K_REG_ATU_FUNC, ®); - if (ret < 0) - return ret; - if (reg & QCA8K_ATU_FUNC_FULL) - return -1; - } - - return 0; -} - -static int -qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, int port) -{ - int ret; - - qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging); - ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port); - if (ret < 0) - return ret; - - return qca8k_fdb_read(priv, fdb); -} - -static int -qca8k_fdb_add(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, - u16 vid, u8 aging) -{ - int ret; - - mutex_lock(&priv->reg_mutex); - qca8k_fdb_write(priv, vid, port_mask, mac, aging); - ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); - mutex_unlock(&priv->reg_mutex); - - return ret; -} - -static int -qca8k_fdb_del(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, u16 vid) -{ - int ret; - - mutex_lock(&priv->reg_mutex); - qca8k_fdb_write(priv, vid, port_mask, mac, 0); - ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); - mutex_unlock(&priv->reg_mutex); - - return ret; -} - -static void -qca8k_fdb_flush(struct qca8k_priv *priv) -{ - mutex_lock(&priv->reg_mutex); - qca8k_fdb_access(priv, QCA8K_FDB_FLUSH, -1); - mutex_unlock(&priv->reg_mutex); -} - -static int -qca8k_fdb_search_and_insert(struct qca8k_priv *priv, u8 port_mask, - const u8 *mac, u16 vid) -{ - struct qca8k_fdb fdb = { 0 }; - int ret; - - mutex_lock(&priv->reg_mutex); - - qca8k_fdb_write(priv, vid, 0, mac, 0); - ret = qca8k_fdb_access(priv, QCA8K_FDB_SEARCH, -1); - if (ret < 0) - goto exit; - - ret = qca8k_fdb_read(priv, &fdb); - if (ret < 0) - goto exit; - - /* Rule exist. Delete first */ - if (!fdb.aging) { - ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); - if (ret) - goto exit; - } - - /* Add port to fdb portmask */ - fdb.port_mask |= port_mask; - - qca8k_fdb_write(priv, vid, fdb.port_mask, mac, fdb.aging); - ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); - -exit: - mutex_unlock(&priv->reg_mutex); - return ret; -} - -static int -qca8k_fdb_search_and_del(struct qca8k_priv *priv, u8 port_mask, - const u8 *mac, u16 vid) -{ - struct qca8k_fdb fdb = { 0 }; - int ret; - - mutex_lock(&priv->reg_mutex); - - qca8k_fdb_write(priv, vid, 0, mac, 0); - ret = qca8k_fdb_access(priv, QCA8K_FDB_SEARCH, -1); - if (ret < 0) - goto exit; - - /* Rule doesn't exist. Why delete? */ - if (!fdb.aging) { - ret = -EINVAL; - goto exit; - } - - ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); - if (ret) - goto exit; - - /* Only port in the rule is this port. Don't re insert */ - if (fdb.port_mask == port_mask) - goto exit; - - /* Remove port from port mask */ - fdb.port_mask &= ~port_mask; - - qca8k_fdb_write(priv, vid, fdb.port_mask, mac, fdb.aging); - ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); - -exit: - mutex_unlock(&priv->reg_mutex); - return ret; -} - -static int -qca8k_vlan_access(struct qca8k_priv *priv, enum qca8k_vlan_cmd cmd, u16 vid) -{ - u32 reg; - int ret; - - /* Set the command and VLAN index */ - reg = QCA8K_VTU_FUNC1_BUSY; - reg |= cmd; - reg |= FIELD_PREP(QCA8K_VTU_FUNC1_VID_MASK, vid); - - /* Write the function register triggering the table access */ - ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC1, reg); - if (ret) - return ret; - - /* wait for completion */ - ret = qca8k_busy_wait(priv, QCA8K_REG_VTU_FUNC1, QCA8K_VTU_FUNC1_BUSY); - if (ret) - return ret; - - /* Check for table full violation when adding an entry */ - if (cmd == QCA8K_VLAN_LOAD) { - ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC1, ®); - if (ret < 0) - return ret; - if (reg & QCA8K_VTU_FUNC1_FULL) - return -ENOMEM; - } - - return 0; -} - -static int -qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, bool untagged) -{ - u32 reg; - int ret; - - /* - We do the right thing with VLAN 0 and treat it as untagged while - preserving the tag on egress. - */ - if (vid == 0) - return 0; - - mutex_lock(&priv->reg_mutex); - ret = qca8k_vlan_access(priv, QCA8K_VLAN_READ, vid); - if (ret < 0) - goto out; - - ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC0, ®); - if (ret < 0) - goto out; - reg |= QCA8K_VTU_FUNC0_VALID | QCA8K_VTU_FUNC0_IVL_EN; - reg &= ~QCA8K_VTU_FUNC0_EG_MODE_PORT_MASK(port); - if (untagged) - reg |= QCA8K_VTU_FUNC0_EG_MODE_PORT_UNTAG(port); - else - reg |= QCA8K_VTU_FUNC0_EG_MODE_PORT_TAG(port); - - ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg); - if (ret) - goto out; - ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid); - -out: - mutex_unlock(&priv->reg_mutex); - - return ret; -} - -static int -qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid) -{ - u32 reg, mask; - int ret, i; - bool del; - - mutex_lock(&priv->reg_mutex); - ret = qca8k_vlan_access(priv, QCA8K_VLAN_READ, vid); - if (ret < 0) - goto out; - - ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC0, ®); - if (ret < 0) - goto out; - reg &= ~QCA8K_VTU_FUNC0_EG_MODE_PORT_MASK(port); - reg |= QCA8K_VTU_FUNC0_EG_MODE_PORT_NOT(port); - - /* Check if we're the last member to be removed */ - del = true; - for (i = 0; i < QCA8K_NUM_PORTS; i++) { - mask = QCA8K_VTU_FUNC0_EG_MODE_PORT_NOT(i); - - if ((reg & mask) != mask) { - del = false; - break; - } - } - - if (del) { - ret = qca8k_vlan_access(priv, QCA8K_VLAN_PURGE, vid); - } else { - ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg); - if (ret) - goto out; - ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid); - } - -out: - mutex_unlock(&priv->reg_mutex); - - return ret; -} - -static int -qca8k_mib_init(struct qca8k_priv *priv) -{ - int ret; - - mutex_lock(&priv->reg_mutex); - ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB, - QCA8K_MIB_FUNC | QCA8K_MIB_BUSY, - FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_FLUSH) | - QCA8K_MIB_BUSY); - if (ret) - goto exit; - - ret = qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY); - if (ret) - goto exit; - - ret = regmap_set_bits(priv->regmap, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP); - if (ret) - goto exit; - - ret = qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB); - -exit: - mutex_unlock(&priv->reg_mutex); - return ret; -} - -static void -qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) -{ - u32 mask = QCA8K_PORT_STATUS_TXMAC | QCA8K_PORT_STATUS_RXMAC; - - /* Port 0 and 6 have no internal PHY */ - if (port > 0 && port < 6) - mask |= QCA8K_PORT_STATUS_LINK_AUTO; - - if (enable) - regmap_set_bits(priv->regmap, QCA8K_REG_PORT_STATUS(port), mask); - else - regmap_clear_bits(priv->regmap, QCA8K_REG_PORT_STATUS(port), mask); -} - -static int qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data, struct sk_buff *read_skb, u32 *val) { @@ -1462,8 +952,8 @@ static int qca8k_find_cpu_port(struct dsa_switch *ds) static int qca8k_setup_of_pws_reg(struct qca8k_priv *priv) { + const struct qca8k_match_data *data = priv->info; struct device_node *node = priv->dev->of_node; - const struct qca8k_match_data *data; u32 val = 0; int ret; @@ -1472,8 +962,6 @@ qca8k_setup_of_pws_reg(struct qca8k_priv *priv) * Should be applied by default but we set this just to make sure. */ if (priv->switch_id == QCA8K_ID_QCA8327) { - data = of_device_get_match_data(priv->dev); - /* Set the correct package of 148 pin for QCA8327 */ if (data->reduced_package) val |= QCA8327_PWS_PACKAGE148_EN; @@ -1993,26 +1481,8 @@ static void qca8k_setup_pcs(struct qca8k_priv *priv, struct qca8k_pcs *qpcs, qpcs->port = port; } -static void -qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) -{ - const struct qca8k_match_data *match_data; - struct qca8k_priv *priv = ds->priv; - int i; - - if (stringset != ETH_SS_STATS) - return; - - match_data = of_device_get_match_data(priv->dev); - - for (i = 0; i < match_data->mib_count; i++) - strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name, - ETH_GSTRING_LEN); -} - static void qca8k_mib_autocast_handler(struct dsa_switch *ds, struct sk_buff *skb) { - const struct qca8k_match_data *match_data; struct qca8k_mib_eth_data *mib_eth_data; struct qca8k_priv *priv = ds->priv; const struct qca8k_mib_desc *mib; @@ -2031,10 +1501,9 @@ static void qca8k_mib_autocast_handler(struct dsa_switch *ds, struct sk_buff *sk if (port != mib_eth_data->req_port) goto exit; - match_data = device_get_match_data(priv->dev); data = mib_eth_data->data; - for (i = 0; i < match_data->mib_count; i++) { + for (i = 0; i < priv->info->mib_count; i++) { mib = &ar8327_mib[i]; /* First 3 mib are present in the skb head */ @@ -2101,522 +1570,6 @@ exit: return ret; } -static void -qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, - uint64_t *data) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - const struct qca8k_match_data *match_data; - const struct qca8k_mib_desc *mib; - u32 reg, i, val; - u32 hi = 0; - int ret; - - if (priv->mgmt_master && - qca8k_get_ethtool_stats_eth(ds, port, data) > 0) - return; - - match_data = of_device_get_match_data(priv->dev); - - for (i = 0; i < match_data->mib_count; i++) { - mib = &ar8327_mib[i]; - reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset; - - ret = qca8k_read(priv, reg, &val); - if (ret < 0) - continue; - - if (mib->size == 2) { - ret = qca8k_read(priv, reg + 4, &hi); - if (ret < 0) - continue; - } - - data[i] = val; - if (mib->size == 2) - data[i] |= (u64)hi << 32; - } -} - -static int -qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset) -{ - const struct qca8k_match_data *match_data; - struct qca8k_priv *priv = ds->priv; - - if (sset != ETH_SS_STATS) - return 0; - - match_data = of_device_get_match_data(priv->dev); - - return match_data->mib_count; -} - -static int -qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port); - u32 reg; - int ret; - - mutex_lock(&priv->reg_mutex); - ret = qca8k_read(priv, QCA8K_REG_EEE_CTRL, ®); - if (ret < 0) - goto exit; - - if (eee->eee_enabled) - reg |= lpi_en; - else - reg &= ~lpi_en; - ret = qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg); - -exit: - mutex_unlock(&priv->reg_mutex); - return ret; -} - -static int -qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) -{ - /* Nothing to do on the port's MAC */ - return 0; -} - -static void -qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - u32 stp_state; - - switch (state) { - case BR_STATE_DISABLED: - stp_state = QCA8K_PORT_LOOKUP_STATE_DISABLED; - break; - case BR_STATE_BLOCKING: - stp_state = QCA8K_PORT_LOOKUP_STATE_BLOCKING; - break; - case BR_STATE_LISTENING: - stp_state = QCA8K_PORT_LOOKUP_STATE_LISTENING; - break; - case BR_STATE_LEARNING: - stp_state = QCA8K_PORT_LOOKUP_STATE_LEARNING; - break; - case BR_STATE_FORWARDING: - default: - stp_state = QCA8K_PORT_LOOKUP_STATE_FORWARD; - break; - } - - qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), - QCA8K_PORT_LOOKUP_STATE_MASK, stp_state); -} - -static int qca8k_port_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, - bool *tx_fwd_offload, - struct netlink_ext_ack *extack) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - int port_mask, cpu_port; - int i, ret; - - cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - port_mask = BIT(cpu_port); - - for (i = 0; i < QCA8K_NUM_PORTS; i++) { - if (dsa_is_cpu_port(ds, i)) - continue; - if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge)) - continue; - /* Add this port to the portvlan mask of the other ports - * in the bridge - */ - ret = regmap_set_bits(priv->regmap, - QCA8K_PORT_LOOKUP_CTRL(i), - BIT(port)); - if (ret) - return ret; - if (i != port) - port_mask |= BIT(i); - } - - /* Add all other ports to this ports portvlan mask */ - ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), - QCA8K_PORT_LOOKUP_MEMBER, port_mask); - - return ret; -} - -static void qca8k_port_bridge_leave(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - int cpu_port, i; - - cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - - for (i = 0; i < QCA8K_NUM_PORTS; i++) { - if (dsa_is_cpu_port(ds, i)) - continue; - if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge)) - continue; - /* Remove this port to the portvlan mask of the other ports - * in the bridge - */ - regmap_clear_bits(priv->regmap, - QCA8K_PORT_LOOKUP_CTRL(i), - BIT(port)); - } - - /* Set the cpu port to be the only one in the portvlan mask of - * this port - */ - qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), - QCA8K_PORT_LOOKUP_MEMBER, BIT(cpu_port)); -} - -static void -qca8k_port_fast_age(struct dsa_switch *ds, int port) -{ - struct qca8k_priv *priv = ds->priv; - - mutex_lock(&priv->reg_mutex); - qca8k_fdb_access(priv, QCA8K_FDB_FLUSH_PORT, port); - mutex_unlock(&priv->reg_mutex); -} - -static int -qca8k_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) -{ - struct qca8k_priv *priv = ds->priv; - unsigned int secs = msecs / 1000; - u32 val; - - /* AGE_TIME reg is set in 7s step */ - val = secs / 7; - - /* Handle case with 0 as val to NOT disable - * learning - */ - if (!val) - val = 1; - - return regmap_update_bits(priv->regmap, QCA8K_REG_ATU_CTRL, QCA8K_ATU_AGE_TIME_MASK, - QCA8K_ATU_AGE_TIME(val)); -} - -static int -qca8k_port_enable(struct dsa_switch *ds, int port, - struct phy_device *phy) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - qca8k_port_set_status(priv, port, 1); - priv->port_enabled_map |= BIT(port); - - if (dsa_is_user_port(ds, port)) - phy_support_asym_pause(phy); - - return 0; -} - -static void -qca8k_port_disable(struct dsa_switch *ds, int port) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - qca8k_port_set_status(priv, port, 0); - priv->port_enabled_map &= ~BIT(port); -} - -static int -qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) -{ - struct qca8k_priv *priv = ds->priv; - int ret; - - /* We have only have a general MTU setting. - * DSA always set the CPU port's MTU to the largest MTU of the slave - * ports. - * Setting MTU just for the CPU port is sufficient to correctly set a - * value for every port. - */ - if (!dsa_is_cpu_port(ds, port)) - return 0; - - /* To change the MAX_FRAME_SIZE the cpu ports must be off or - * the switch panics. - * Turn off both cpu ports before applying the new value to prevent - * this. - */ - if (priv->port_enabled_map & BIT(0)) - qca8k_port_set_status(priv, 0, 0); - - if (priv->port_enabled_map & BIT(6)) - qca8k_port_set_status(priv, 6, 0); - - /* Include L2 header / FCS length */ - ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, new_mtu + ETH_HLEN + ETH_FCS_LEN); - - if (priv->port_enabled_map & BIT(0)) - qca8k_port_set_status(priv, 0, 1); - - if (priv->port_enabled_map & BIT(6)) - qca8k_port_set_status(priv, 6, 1); - - return ret; -} - -static int -qca8k_port_max_mtu(struct dsa_switch *ds, int port) -{ - return QCA8K_MAX_MTU; -} - -static int -qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr, - u16 port_mask, u16 vid) -{ - /* Set the vid to the port vlan id if no vid is set */ - if (!vid) - vid = QCA8K_PORT_VID_DEF; - - return qca8k_fdb_add(priv, addr, port_mask, vid, - QCA8K_ATU_STATUS_STATIC); -} - -static int -qca8k_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, - struct dsa_db db) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - u16 port_mask = BIT(port); - - return qca8k_port_fdb_insert(priv, addr, port_mask, vid); -} - -static int -qca8k_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, - struct dsa_db db) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - u16 port_mask = BIT(port); - - if (!vid) - vid = QCA8K_PORT_VID_DEF; - - return qca8k_fdb_del(priv, addr, port_mask, vid); -} - -static int -qca8k_port_fdb_dump(struct dsa_switch *ds, int port, - dsa_fdb_dump_cb_t *cb, void *data) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - struct qca8k_fdb _fdb = { 0 }; - int cnt = QCA8K_NUM_FDB_RECORDS; - bool is_static; - int ret = 0; - - mutex_lock(&priv->reg_mutex); - while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) { - if (!_fdb.aging) - break; - is_static = (_fdb.aging == QCA8K_ATU_STATUS_STATIC); - ret = cb(_fdb.mac, _fdb.vid, is_static, data); - if (ret) - break; - } - mutex_unlock(&priv->reg_mutex); - - return 0; -} - -static int -qca8k_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db) -{ - struct qca8k_priv *priv = ds->priv; - const u8 *addr = mdb->addr; - u16 vid = mdb->vid; - - return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid); -} - -static int -qca8k_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db) -{ - struct qca8k_priv *priv = ds->priv; - const u8 *addr = mdb->addr; - u16 vid = mdb->vid; - - return qca8k_fdb_search_and_del(priv, BIT(port), addr, vid); -} - -static int -qca8k_port_mirror_add(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror, - bool ingress, struct netlink_ext_ack *extack) -{ - struct qca8k_priv *priv = ds->priv; - int monitor_port, ret; - u32 reg, val; - - /* Check for existent entry */ - if ((ingress ? priv->mirror_rx : priv->mirror_tx) & BIT(port)) - return -EEXIST; - - ret = regmap_read(priv->regmap, QCA8K_REG_GLOBAL_FW_CTRL0, &val); - if (ret) - return ret; - - /* QCA83xx can have only one port set to mirror mode. - * Check that the correct port is requested and return error otherwise. - * When no mirror port is set, the values is set to 0xF - */ - monitor_port = FIELD_GET(QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, val); - if (monitor_port != 0xF && monitor_port != mirror->to_local_port) - return -EEXIST; - - /* Set the monitor port */ - val = FIELD_PREP(QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, - mirror->to_local_port); - ret = regmap_update_bits(priv->regmap, QCA8K_REG_GLOBAL_FW_CTRL0, - QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, val); - if (ret) - return ret; - - if (ingress) { - reg = QCA8K_PORT_LOOKUP_CTRL(port); - val = QCA8K_PORT_LOOKUP_ING_MIRROR_EN; - } else { - reg = QCA8K_REG_PORT_HOL_CTRL1(port); - val = QCA8K_PORT_HOL_CTRL1_EG_MIRROR_EN; - } - - ret = regmap_update_bits(priv->regmap, reg, val, val); - if (ret) - return ret; - - /* Track mirror port for tx and rx to decide when the - * mirror port has to be disabled. - */ - if (ingress) - priv->mirror_rx |= BIT(port); - else - priv->mirror_tx |= BIT(port); - - return 0; -} - -static void -qca8k_port_mirror_del(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror) -{ - struct qca8k_priv *priv = ds->priv; - u32 reg, val; - int ret; - - if (mirror->ingress) { - reg = QCA8K_PORT_LOOKUP_CTRL(port); - val = QCA8K_PORT_LOOKUP_ING_MIRROR_EN; - } else { - reg = QCA8K_REG_PORT_HOL_CTRL1(port); - val = QCA8K_PORT_HOL_CTRL1_EG_MIRROR_EN; - } - - ret = regmap_clear_bits(priv->regmap, reg, val); - if (ret) - goto err; - - if (mirror->ingress) - priv->mirror_rx &= ~BIT(port); - else - priv->mirror_tx &= ~BIT(port); - - /* No port set to send packet to mirror port. Disable mirror port */ - if (!priv->mirror_rx && !priv->mirror_tx) { - val = FIELD_PREP(QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, 0xF); - ret = regmap_update_bits(priv->regmap, QCA8K_REG_GLOBAL_FW_CTRL0, - QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, val); - if (ret) - goto err; - } -err: - dev_err(priv->dev, "Failed to del mirror port from %d", port); -} - -static int -qca8k_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, - struct netlink_ext_ack *extack) -{ - struct qca8k_priv *priv = ds->priv; - int ret; - - if (vlan_filtering) { - ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), - QCA8K_PORT_LOOKUP_VLAN_MODE_MASK, - QCA8K_PORT_LOOKUP_VLAN_MODE_SECURE); - } else { - ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), - QCA8K_PORT_LOOKUP_VLAN_MODE_MASK, - QCA8K_PORT_LOOKUP_VLAN_MODE_NONE); - } - - return ret; -} - -static int -qca8k_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack) -{ - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - struct qca8k_priv *priv = ds->priv; - int ret; - - ret = qca8k_vlan_add(priv, port, vlan->vid, untagged); - if (ret) { - dev_err(priv->dev, "Failed to add VLAN to port %d (%d)", port, ret); - return ret; - } - - if (pvid) { - ret = qca8k_rmw(priv, QCA8K_EGRESS_VLAN(port), - QCA8K_EGREES_VLAN_PORT_MASK(port), - QCA8K_EGREES_VLAN_PORT(port, vlan->vid)); - if (ret) - return ret; - - ret = qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(port), - QCA8K_PORT_VLAN_CVID(vlan->vid) | - QCA8K_PORT_VLAN_SVID(vlan->vid)); - } - - return ret; -} - -static int -qca8k_port_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - struct qca8k_priv *priv = ds->priv; - int ret; - - ret = qca8k_vlan_del(priv, port, vlan->vid); - if (ret) - dev_err(priv->dev, "Failed to delete VLAN from port %d (%d)", port, ret); - - return ret; -} - static u32 qca8k_get_phy_flags(struct dsa_switch *ds, int port) { struct qca8k_priv *priv = ds->priv; @@ -2640,174 +1593,6 @@ qca8k_get_tag_protocol(struct dsa_switch *ds, int port, return DSA_TAG_PROTO_QCA; } -static bool -qca8k_lag_can_offload(struct dsa_switch *ds, struct dsa_lag lag, - struct netdev_lag_upper_info *info) -{ - struct dsa_port *dp; - int members = 0; - - if (!lag.id) - return false; - - dsa_lag_foreach_port(dp, ds->dst, &lag) - /* Includes the port joining the LAG */ - members++; - - if (members > QCA8K_NUM_PORTS_FOR_LAG) - return false; - - if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) - return false; - - if (info->hash_type != NETDEV_LAG_HASH_L2 && - info->hash_type != NETDEV_LAG_HASH_L23) - return false; - - return true; -} - -static int -qca8k_lag_setup_hash(struct dsa_switch *ds, struct dsa_lag lag, - struct netdev_lag_upper_info *info) -{ - struct net_device *lag_dev = lag.dev; - struct qca8k_priv *priv = ds->priv; - bool unique_lag = true; - unsigned int i; - u32 hash = 0; - - switch (info->hash_type) { - case NETDEV_LAG_HASH_L23: - hash |= QCA8K_TRUNK_HASH_SIP_EN; - hash |= QCA8K_TRUNK_HASH_DIP_EN; - fallthrough; - case NETDEV_LAG_HASH_L2: - hash |= QCA8K_TRUNK_HASH_SA_EN; - hash |= QCA8K_TRUNK_HASH_DA_EN; - break; - default: /* We should NEVER reach this */ - return -EOPNOTSUPP; - } - - /* Check if we are the unique configured LAG */ - dsa_lags_foreach_id(i, ds->dst) - if (i != lag.id && dsa_lag_by_id(ds->dst, i)) { - unique_lag = false; - break; - } - - /* Hash Mode is global. Make sure the same Hash Mode - * is set to all the 4 possible lag. - * If we are the unique LAG we can set whatever hash - * mode we want. - * To change hash mode it's needed to remove all LAG - * and change the mode with the latest. - */ - if (unique_lag) { - priv->lag_hash_mode = hash; - } else if (priv->lag_hash_mode != hash) { - netdev_err(lag_dev, "Error: Mismatched Hash Mode across different lag is not supported\n"); - return -EOPNOTSUPP; - } - - return regmap_update_bits(priv->regmap, QCA8K_TRUNK_HASH_EN_CTRL, - QCA8K_TRUNK_HASH_MASK, hash); -} - -static int -qca8k_lag_refresh_portmap(struct dsa_switch *ds, int port, - struct dsa_lag lag, bool delete) -{ - struct qca8k_priv *priv = ds->priv; - int ret, id, i; - u32 val; - - /* DSA LAG IDs are one-based, hardware is zero-based */ - id = lag.id - 1; - - /* Read current port member */ - ret = regmap_read(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL0, &val); - if (ret) - return ret; - - /* Shift val to the correct trunk */ - val >>= QCA8K_REG_GOL_TRUNK_SHIFT(id); - val &= QCA8K_REG_GOL_TRUNK_MEMBER_MASK; - if (delete) - val &= ~BIT(port); - else - val |= BIT(port); - - /* Update port member. With empty portmap disable trunk */ - ret = regmap_update_bits(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL0, - QCA8K_REG_GOL_TRUNK_MEMBER(id) | - QCA8K_REG_GOL_TRUNK_EN(id), - !val << QCA8K_REG_GOL_TRUNK_SHIFT(id) | - val << QCA8K_REG_GOL_TRUNK_SHIFT(id)); - - /* Search empty member if adding or port on deleting */ - for (i = 0; i < QCA8K_NUM_PORTS_FOR_LAG; i++) { - ret = regmap_read(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL(id), &val); - if (ret) - return ret; - - val >>= QCA8K_REG_GOL_TRUNK_ID_MEM_ID_SHIFT(id, i); - val &= QCA8K_REG_GOL_TRUNK_ID_MEM_ID_MASK; - - if (delete) { - /* If port flagged to be disabled assume this member is - * empty - */ - if (val != QCA8K_REG_GOL_TRUNK_ID_MEM_ID_EN_MASK) - continue; - - val &= QCA8K_REG_GOL_TRUNK_ID_MEM_ID_PORT_MASK; - if (val != port) - continue; - } else { - /* If port flagged to be enabled assume this member is - * already set - */ - if (val == QCA8K_REG_GOL_TRUNK_ID_MEM_ID_EN_MASK) - continue; - } - - /* We have found the member to add/remove */ - break; - } - - /* Set port in the correct port mask or disable port if in delete mode */ - return regmap_update_bits(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL(id), - QCA8K_REG_GOL_TRUNK_ID_MEM_ID_EN(id, i) | - QCA8K_REG_GOL_TRUNK_ID_MEM_ID_PORT(id, i), - !delete << QCA8K_REG_GOL_TRUNK_ID_MEM_ID_SHIFT(id, i) | - port << QCA8K_REG_GOL_TRUNK_ID_MEM_ID_SHIFT(id, i)); -} - -static int -qca8k_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, - struct netdev_lag_upper_info *info) -{ - int ret; - - if (!qca8k_lag_can_offload(ds, lag, info)) - return -EOPNOTSUPP; - - ret = qca8k_lag_setup_hash(ds, lag, info); - if (ret) - return ret; - - return qca8k_lag_refresh_portmap(ds, port, lag, false); -} - -static int -qca8k_port_lag_leave(struct dsa_switch *ds, int port, - struct dsa_lag lag) -{ - return qca8k_lag_refresh_portmap(ds, port, lag, true); -} - static void qca8k_master_change(struct dsa_switch *ds, const struct net_device *master, bool operational) @@ -3091,36 +1876,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .connect_tag_protocol = qca8k_connect_tag_protocol, }; -static int qca8k_read_switch_id(struct qca8k_priv *priv) -{ - const struct qca8k_match_data *data; - u32 val; - u8 id; - int ret; - - /* get the switches ID from the compatible */ - data = of_device_get_match_data(priv->dev); - if (!data) - return -ENODEV; - - ret = qca8k_read(priv, QCA8K_REG_MASK_CTRL, &val); - if (ret < 0) - return -ENODEV; - - id = QCA8K_MASK_CTRL_DEVICE_ID(val); - if (id != data->id) { - dev_err(priv->dev, "Switch id detected %x but expected %x", id, data->id); - return -ENODEV; - } - - priv->switch_id = id; - - /* Save revision to communicate to the internal PHY driver */ - priv->switch_revision = QCA8K_MASK_CTRL_REV_ID(val); - - return 0; -} - static int qca8k_sw_probe(struct mdio_device *mdiodev) { @@ -3134,6 +1889,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (!priv) return -ENOMEM; + priv->info = of_device_get_match_data(priv->dev); priv->bus = mdiodev->bus; priv->dev = &mdiodev->dev; @@ -3256,20 +2012,29 @@ static int qca8k_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(qca8k_pm_ops, qca8k_suspend, qca8k_resume); +static const struct qca8k_info_ops qca8xxx_ops = { + .autocast_mib = qca8k_get_ethtool_stats_eth, + .read_eth = qca8k_read_eth, + .write_eth = qca8k_write_eth, +}; + static const struct qca8k_match_data qca8327 = { .id = QCA8K_ID_QCA8327, .reduced_package = true, .mib_count = QCA8K_QCA832X_MIB_COUNT, + .ops = &qca8xxx_ops, }; static const struct qca8k_match_data qca8328 = { .id = QCA8K_ID_QCA8327, .mib_count = QCA8K_QCA832X_MIB_COUNT, + .ops = &qca8xxx_ops, }; static const struct qca8k_match_data qca833x = { .id = QCA8K_ID_QCA8337, .mib_count = QCA8K_QCA833X_MIB_COUNT, + .ops = &qca8xxx_ops, }; static const struct of_device_id qca8k_of_match[] = { diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c new file mode 100644 index 000000000000..bba95613e218 --- /dev/null +++ b/drivers/net/dsa/qca/qca8k-common.c @@ -0,0 +1,1210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009 Felix Fietkau <nbd@nbd.name> + * Copyright (C) 2011-2012 Gabor Juhos <juhosg@openwrt.org> + * Copyright (c) 2015, 2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2016 John Crispin <john@phrozen.org> + */ + +#include <linux/netdevice.h> +#include <net/dsa.h> +#include <linux/if_bridge.h> + +#include "qca8k.h" + +#define MIB_DESC(_s, _o, _n) \ + { \ + .size = (_s), \ + .offset = (_o), \ + .name = (_n), \ + } + +const struct qca8k_mib_desc ar8327_mib[] = { + MIB_DESC(1, 0x00, "RxBroad"), + MIB_DESC(1, 0x04, "RxPause"), + MIB_DESC(1, 0x08, "RxMulti"), + MIB_DESC(1, 0x0c, "RxFcsErr"), + MIB_DESC(1, 0x10, "RxAlignErr"), + MIB_DESC(1, 0x14, "RxRunt"), + MIB_DESC(1, 0x18, "RxFragment"), + MIB_DESC(1, 0x1c, "Rx64Byte"), + MIB_DESC(1, 0x20, "Rx128Byte"), + MIB_DESC(1, 0x24, "Rx256Byte"), + MIB_DESC(1, 0x28, "Rx512Byte"), + MIB_DESC(1, 0x2c, "Rx1024Byte"), + MIB_DESC(1, 0x30, "Rx1518Byte"), + MIB_DESC(1, 0x34, "RxMaxByte"), + MIB_DESC(1, 0x38, "RxTooLong"), + MIB_DESC(2, 0x3c, "RxGoodByte"), + MIB_DESC(2, 0x44, "RxBadByte"), + MIB_DESC(1, 0x4c, "RxOverFlow"), + MIB_DESC(1, 0x50, "Filtered"), + MIB_DESC(1, 0x54, "TxBroad"), + MIB_DESC(1, 0x58, "TxPause"), + MIB_DESC(1, 0x5c, "TxMulti"), + MIB_DESC(1, 0x60, "TxUnderRun"), + MIB_DESC(1, 0x64, "Tx64Byte"), + MIB_DESC(1, 0x68, "Tx128Byte"), + MIB_DESC(1, 0x6c, "Tx256Byte"), + MIB_DESC(1, 0x70, "Tx512Byte"), + MIB_DESC(1, 0x74, "Tx1024Byte"), + MIB_DESC(1, 0x78, "Tx1518Byte"), + MIB_DESC(1, 0x7c, "TxMaxByte"), + MIB_DESC(1, 0x80, "TxOverSize"), + MIB_DESC(2, 0x84, "TxByte"), + MIB_DESC(1, 0x8c, "TxCollision"), + MIB_DESC(1, 0x90, "TxAbortCol"), + MIB_DESC(1, 0x94, "TxMultiCol"), + MIB_DESC(1, 0x98, "TxSingleCol"), + MIB_DESC(1, 0x9c, "TxExcDefer"), + MIB_DESC(1, 0xa0, "TxDefer"), + MIB_DESC(1, 0xa4, "TxLateCol"), + MIB_DESC(1, 0xa8, "RXUnicast"), + MIB_DESC(1, 0xac, "TXUnicast"), +}; + +int qca8k_read(struct qca8k_priv *priv, u32 reg, u32 *val) +{ + return regmap_read(priv->regmap, reg, val); +} + +int qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val) +{ + return regmap_write(priv->regmap, reg, val); +} + +int qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val) +{ + return regmap_update_bits(priv->regmap, reg, mask, write_val); +} + +static const struct regmap_range qca8k_readable_ranges[] = { + regmap_reg_range(0x0000, 0x00e4), /* Global control */ + regmap_reg_range(0x0100, 0x0168), /* EEE control */ + regmap_reg_range(0x0200, 0x0270), /* Parser control */ + regmap_reg_range(0x0400, 0x0454), /* ACL */ + regmap_reg_range(0x0600, 0x0718), /* Lookup */ + regmap_reg_range(0x0800, 0x0b70), /* QM */ + regmap_reg_range(0x0c00, 0x0c80), /* PKT */ + regmap_reg_range(0x0e00, 0x0e98), /* L3 */ + regmap_reg_range(0x1000, 0x10ac), /* MIB - Port0 */ + regmap_reg_range(0x1100, 0x11ac), /* MIB - Port1 */ + regmap_reg_range(0x1200, 0x12ac), /* MIB - Port2 */ + regmap_reg_range(0x1300, 0x13ac), /* MIB - Port3 */ + regmap_reg_range(0x1400, 0x14ac), /* MIB - Port4 */ + regmap_reg_range(0x1500, 0x15ac), /* MIB - Port5 */ + regmap_reg_range(0x1600, 0x16ac), /* MIB - Port6 */ +}; + +const struct regmap_access_table qca8k_readable_table = { + .yes_ranges = qca8k_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), +}; + +/* TODO: remove these extra ops when we can support regmap bulk read/write */ +static int qca8k_bulk_read(struct qca8k_priv *priv, u32 reg, u32 *val, int len) +{ + int i, count = len / sizeof(u32), ret; + + if (priv->mgmt_master && priv->info->ops->read_eth && + !priv->info->ops->read_eth(priv, reg, val, len)) + return 0; + + for (i = 0; i < count; i++) { + ret = regmap_read(priv->regmap, reg + (i * 4), val + i); + if (ret < 0) + return ret; + } + + return 0; +} + +/* TODO: remove these extra ops when we can support regmap bulk read/write */ +static int qca8k_bulk_write(struct qca8k_priv *priv, u32 reg, u32 *val, int len) +{ + int i, count = len / sizeof(u32), ret; + u32 tmp; + + if (priv->mgmt_master && priv->info->ops->write_eth && + !priv->info->ops->write_eth(priv, reg, val, len)) + return 0; + + for (i = 0; i < count; i++) { + tmp = val[i]; + + ret = regmap_write(priv->regmap, reg + (i * 4), tmp); + if (ret < 0) + return ret; + } + + return 0; +} + +static int qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask) +{ + u32 val; + + return regmap_read_poll_timeout(priv->regmap, reg, val, !(val & mask), 0, + QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC); +} + +static int qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb) +{ + u32 reg[3]; + int ret; + + /* load the ARL table into an array */ + ret = qca8k_bulk_read(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg)); + if (ret) + return ret; + + /* vid - 83:72 */ + fdb->vid = FIELD_GET(QCA8K_ATU_VID_MASK, reg[2]); + /* aging - 67:64 */ + fdb->aging = FIELD_GET(QCA8K_ATU_STATUS_MASK, reg[2]); + /* portmask - 54:48 */ + fdb->port_mask = FIELD_GET(QCA8K_ATU_PORT_MASK, reg[1]); + /* mac - 47:0 */ + fdb->mac[0] = FIELD_GET(QCA8K_ATU_ADDR0_MASK, reg[1]); + fdb->mac[1] = FIELD_GET(QCA8K_ATU_ADDR1_MASK, reg[1]); + fdb->mac[2] = FIELD_GET(QCA8K_ATU_ADDR2_MASK, reg[0]); + fdb->mac[3] = FIELD_GET(QCA8K_ATU_ADDR3_MASK, reg[0]); + fdb->mac[4] = FIELD_GET(QCA8K_ATU_ADDR4_MASK, reg[0]); + fdb->mac[5] = FIELD_GET(QCA8K_ATU_ADDR5_MASK, reg[0]); + + return 0; +} + +static void qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, + const u8 *mac, u8 aging) +{ + u32 reg[3] = { 0 }; + + /* vid - 83:72 */ + reg[2] = FIELD_PREP(QCA8K_ATU_VID_MASK, vid); + /* aging - 67:64 */ + reg[2] |= FIELD_PREP(QCA8K_ATU_STATUS_MASK, aging); + /* portmask - 54:48 */ + reg[1] = FIELD_PREP(QCA8K_ATU_PORT_MASK, port_mask); + /* mac - 47:0 */ + reg[1] |= FIELD_PREP(QCA8K_ATU_ADDR0_MASK, mac[0]); + reg[1] |= FIELD_PREP(QCA8K_ATU_ADDR1_MASK, mac[1]); + reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR2_MASK, mac[2]); + reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR3_MASK, mac[3]); + reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR4_MASK, mac[4]); + reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR5_MASK, mac[5]); + + /* load the array into the ARL table */ + qca8k_bulk_write(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg)); +} + +static int qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, + int port) +{ + u32 reg; + int ret; + + /* Set the command and FDB index */ + reg = QCA8K_ATU_FUNC_BUSY; + reg |= cmd; + if (port >= 0) { + reg |= QCA8K_ATU_FUNC_PORT_EN; + reg |= FIELD_PREP(QCA8K_ATU_FUNC_PORT_MASK, port); + } + + /* Write the function register triggering the table access */ + ret = qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg); + if (ret) + return ret; + + /* wait for completion */ + ret = qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY); + if (ret) + return ret; + + /* Check for table full violation when adding an entry */ + if (cmd == QCA8K_FDB_LOAD) { + ret = qca8k_read(priv, QCA8K_REG_ATU_FUNC, ®); + if (ret < 0) + return ret; + if (reg & QCA8K_ATU_FUNC_FULL) + return -1; + } + + return 0; +} + +static int qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, + int port) +{ + int ret; + + qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port); + if (ret < 0) + return ret; + + return qca8k_fdb_read(priv, fdb); +} + +static int qca8k_fdb_add(struct qca8k_priv *priv, const u8 *mac, + u16 port_mask, u16 vid, u8 aging) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static int qca8k_fdb_del(struct qca8k_priv *priv, const u8 *mac, + u16 port_mask, u16 vid) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, 0); + ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +void qca8k_fdb_flush(struct qca8k_priv *priv) +{ + mutex_lock(&priv->reg_mutex); + qca8k_fdb_access(priv, QCA8K_FDB_FLUSH, -1); + mutex_unlock(&priv->reg_mutex); +} + +static int qca8k_fdb_search_and_insert(struct qca8k_priv *priv, u8 port_mask, + const u8 *mac, u16 vid) +{ + struct qca8k_fdb fdb = { 0 }; + int ret; + + mutex_lock(&priv->reg_mutex); + + qca8k_fdb_write(priv, vid, 0, mac, 0); + ret = qca8k_fdb_access(priv, QCA8K_FDB_SEARCH, -1); + if (ret < 0) + goto exit; + + ret = qca8k_fdb_read(priv, &fdb); + if (ret < 0) + goto exit; + + /* Rule exist. Delete first */ + if (!fdb.aging) { + ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); + if (ret) + goto exit; + } + + /* Add port to fdb portmask */ + fdb.port_mask |= port_mask; + + qca8k_fdb_write(priv, vid, fdb.port_mask, mac, fdb.aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); + +exit: + mutex_unlock(&priv->reg_mutex); + return ret; +} + +static int qca8k_fdb_search_and_del(struct qca8k_priv *priv, u8 port_mask, + const u8 *mac, u16 vid) +{ + struct qca8k_fdb fdb = { 0 }; + int ret; + + mutex_lock(&priv->reg_mutex); + + qca8k_fdb_write(priv, vid, 0, mac, 0); + ret = qca8k_fdb_access(priv, QCA8K_FDB_SEARCH, -1); + if (ret < 0) + goto exit; + + /* Rule doesn't exist. Why delete? */ + if (!fdb.aging) { + ret = -EINVAL; + goto exit; + } + + ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); + if (ret) + goto exit; + + /* Only port in the rule is this port. Don't re insert */ + if (fdb.port_mask == port_mask) + goto exit; + + /* Remove port from port mask */ + fdb.port_mask &= ~port_mask; + + qca8k_fdb_write(priv, vid, fdb.port_mask, mac, fdb.aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); + +exit: + mutex_unlock(&priv->reg_mutex); + return ret; +} + +static int qca8k_vlan_access(struct qca8k_priv *priv, + enum qca8k_vlan_cmd cmd, u16 vid) +{ + u32 reg; + int ret; + + /* Set the command and VLAN index */ + reg = QCA8K_VTU_FUNC1_BUSY; + reg |= cmd; + reg |= FIELD_PREP(QCA8K_VTU_FUNC1_VID_MASK, vid); + + /* Write the function register triggering the table access */ + ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC1, reg); + if (ret) + return ret; + + /* wait for completion */ + ret = qca8k_busy_wait(priv, QCA8K_REG_VTU_FUNC1, QCA8K_VTU_FUNC1_BUSY); + if (ret) + return ret; + + /* Check for table full violation when adding an entry */ + if (cmd == QCA8K_VLAN_LOAD) { + ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC1, ®); + if (ret < 0) + return ret; + if (reg & QCA8K_VTU_FUNC1_FULL) + return -ENOMEM; + } + + return 0; +} + +static int qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, + bool untagged) +{ + u32 reg; + int ret; + + /* We do the right thing with VLAN 0 and treat it as untagged while + * preserving the tag on egress. + */ + if (vid == 0) + return 0; + + mutex_lock(&priv->reg_mutex); + ret = qca8k_vlan_access(priv, QCA8K_VLAN_READ, vid); + if (ret < 0) + goto out; + + ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC0, ®); + if (ret < 0) + goto out; + reg |= QCA8K_VTU_FUNC0_VALID | QCA8K_VTU_FUNC0_IVL_EN; + reg &= ~QCA8K_VTU_FUNC0_EG_MODE_PORT_MASK(port); + if (untagged) + reg |= QCA8K_VTU_FUNC0_EG_MODE_PORT_UNTAG(port); + else + reg |= QCA8K_VTU_FUNC0_EG_MODE_PORT_TAG(port); + + ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg); + if (ret) + goto out; + ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid); + +out: + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static int qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid) +{ + u32 reg, mask; + int ret, i; + bool del; + + mutex_lock(&priv->reg_mutex); + ret = qca8k_vlan_access(priv, QCA8K_VLAN_READ, vid); + if (ret < 0) + goto out; + + ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC0, ®); + if (ret < 0) + goto out; + reg &= ~QCA8K_VTU_FUNC0_EG_MODE_PORT_MASK(port); + reg |= QCA8K_VTU_FUNC0_EG_MODE_PORT_NOT(port); + + /* Check if we're the last member to be removed */ + del = true; + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + mask = QCA8K_VTU_FUNC0_EG_MODE_PORT_NOT(i); + + if ((reg & mask) != mask) { + del = false; + break; + } + } + + if (del) { + ret = qca8k_vlan_access(priv, QCA8K_VLAN_PURGE, vid); + } else { + ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg); + if (ret) + goto out; + ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid); + } + +out: + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +int qca8k_mib_init(struct qca8k_priv *priv) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB, + QCA8K_MIB_FUNC | QCA8K_MIB_BUSY, + FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_FLUSH) | + QCA8K_MIB_BUSY); + if (ret) + goto exit; + + ret = qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY); + if (ret) + goto exit; + + ret = regmap_set_bits(priv->regmap, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP); + if (ret) + goto exit; + + ret = qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB); + +exit: + mutex_unlock(&priv->reg_mutex); + return ret; +} + +void qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) +{ + u32 mask = QCA8K_PORT_STATUS_TXMAC | QCA8K_PORT_STATUS_RXMAC; + + /* Port 0 and 6 have no internal PHY */ + if (port > 0 && port < 6) + mask |= QCA8K_PORT_STATUS_LINK_AUTO; + + if (enable) + regmap_set_bits(priv->regmap, QCA8K_REG_PORT_STATUS(port), mask); + else + regmap_clear_bits(priv->regmap, QCA8K_REG_PORT_STATUS(port), mask); +} + +void qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, + uint8_t *data) +{ + struct qca8k_priv *priv = ds->priv; + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < priv->info->mib_count; i++) + strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name, + ETH_GSTRING_LEN); +} + +void qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct qca8k_priv *priv = ds->priv; + const struct qca8k_mib_desc *mib; + u32 reg, i, val; + u32 hi = 0; + int ret; + + if (priv->mgmt_master && priv->info->ops->autocast_mib && + priv->info->ops->autocast_mib(ds, port, data) > 0) + return; + + for (i = 0; i < priv->info->mib_count; i++) { + mib = &ar8327_mib[i]; + reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset; + + ret = qca8k_read(priv, reg, &val); + if (ret < 0) + continue; + + if (mib->size == 2) { + ret = qca8k_read(priv, reg + 4, &hi); + if (ret < 0) + continue; + } + + data[i] = val; + if (mib->size == 2) + data[i] |= (u64)hi << 32; + } +} + +int qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + struct qca8k_priv *priv = ds->priv; + + if (sset != ETH_SS_STATS) + return 0; + + return priv->info->mib_count; +} + +int qca8k_set_mac_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *eee) +{ + u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port); + struct qca8k_priv *priv = ds->priv; + u32 reg; + int ret; + + mutex_lock(&priv->reg_mutex); + ret = qca8k_read(priv, QCA8K_REG_EEE_CTRL, ®); + if (ret < 0) + goto exit; + + if (eee->eee_enabled) + reg |= lpi_en; + else + reg &= ~lpi_en; + ret = qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg); + +exit: + mutex_unlock(&priv->reg_mutex); + return ret; +} + +int qca8k_get_mac_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) +{ + /* Nothing to do on the port's MAC */ + return 0; +} + +void qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +{ + struct qca8k_priv *priv = ds->priv; + u32 stp_state; + + switch (state) { + case BR_STATE_DISABLED: + stp_state = QCA8K_PORT_LOOKUP_STATE_DISABLED; + break; + case BR_STATE_BLOCKING: + stp_state = QCA8K_PORT_LOOKUP_STATE_BLOCKING; + break; + case BR_STATE_LISTENING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LISTENING; + break; + case BR_STATE_LEARNING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LEARNING; + break; + case BR_STATE_FORWARDING: + default: + stp_state = QCA8K_PORT_LOOKUP_STATE_FORWARD; + break; + } + + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_STATE_MASK, stp_state); +} + +int qca8k_port_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge, + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct qca8k_priv *priv = ds->priv; + int port_mask, cpu_port; + int i, ret; + + cpu_port = dsa_to_port(ds, port)->cpu_dp->index; + port_mask = BIT(cpu_port); + + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + if (dsa_is_cpu_port(ds, i)) + continue; + if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge)) + continue; + /* Add this port to the portvlan mask of the other ports + * in the bridge + */ + ret = regmap_set_bits(priv->regmap, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + if (ret) + return ret; + if (i != port) + port_mask |= BIT(i); + } + + /* Add all other ports to this ports portvlan mask */ + ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, port_mask); + + return ret; +} + +void qca8k_port_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) +{ + struct qca8k_priv *priv = ds->priv; + int cpu_port, i; + + cpu_port = dsa_to_port(ds, port)->cpu_dp->index; + + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + if (dsa_is_cpu_port(ds, i)) + continue; + if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge)) + continue; + /* Remove this port to the portvlan mask of the other ports + * in the bridge + */ + regmap_clear_bits(priv->regmap, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + } + + /* Set the cpu port to be the only one in the portvlan mask of + * this port + */ + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, BIT(cpu_port)); +} + +void qca8k_port_fast_age(struct dsa_switch *ds, int port) +{ + struct qca8k_priv *priv = ds->priv; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_access(priv, QCA8K_FDB_FLUSH_PORT, port); + mutex_unlock(&priv->reg_mutex); +} + +int qca8k_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) +{ + struct qca8k_priv *priv = ds->priv; + unsigned int secs = msecs / 1000; + u32 val; + + /* AGE_TIME reg is set in 7s step */ + val = secs / 7; + + /* Handle case with 0 as val to NOT disable + * learning + */ + if (!val) + val = 1; + + return regmap_update_bits(priv->regmap, QCA8K_REG_ATU_CTRL, + QCA8K_ATU_AGE_TIME_MASK, + QCA8K_ATU_AGE_TIME(val)); +} + +int qca8k_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = ds->priv; + + qca8k_port_set_status(priv, port, 1); + priv->port_enabled_map |= BIT(port); + + if (dsa_is_user_port(ds, port)) + phy_support_asym_pause(phy); + + return 0; +} + +void qca8k_port_disable(struct dsa_switch *ds, int port) +{ + struct qca8k_priv *priv = ds->priv; + + qca8k_port_set_status(priv, port, 0); + priv->port_enabled_map &= ~BIT(port); +} + +int qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +{ + struct qca8k_priv *priv = ds->priv; + int ret; + + /* We have only have a general MTU setting. + * DSA always set the CPU port's MTU to the largest MTU of the slave + * ports. + * Setting MTU just for the CPU port is sufficient to correctly set a + * value for every port. + */ + if (!dsa_is_cpu_port(ds, port)) + return 0; + + /* To change the MAX_FRAME_SIZE the cpu ports must be off or + * the switch panics. + * Turn off both cpu ports before applying the new value to prevent + * this. + */ + if (priv->port_enabled_map & BIT(0)) + qca8k_port_set_status(priv, 0, 0); + + if (priv->port_enabled_map & BIT(6)) + qca8k_port_set_status(priv, 6, 0); + + /* Include L2 header / FCS length */ + ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, new_mtu + + ETH_HLEN + ETH_FCS_LEN); + + if (priv->port_enabled_map & BIT(0)) + qca8k_port_set_status(priv, 0, 1); + + if (priv->port_enabled_map & BIT(6)) + qca8k_port_set_status(priv, 6, 1); + + return ret; +} + +int qca8k_port_max_mtu(struct dsa_switch *ds, int port) +{ + return QCA8K_MAX_MTU; +} + +int qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr, + u16 port_mask, u16 vid) +{ + /* Set the vid to the port vlan id if no vid is set */ + if (!vid) + vid = QCA8K_PORT_VID_DEF; + + return qca8k_fdb_add(priv, addr, port_mask, vid, + QCA8K_ATU_STATUS_STATIC); +} + +int qca8k_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + + return qca8k_port_fdb_insert(priv, addr, port_mask, vid); +} + +int qca8k_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + + if (!vid) + vid = QCA8K_PORT_VID_DEF; + + return qca8k_fdb_del(priv, addr, port_mask, vid); +} + +int qca8k_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct qca8k_fdb _fdb = { 0 }; + int cnt = QCA8K_NUM_FDB_RECORDS; + bool is_static; + int ret = 0; + + mutex_lock(&priv->reg_mutex); + while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) { + if (!_fdb.aging) + break; + is_static = (_fdb.aging == QCA8K_ATU_STATUS_STATIC); + ret = cb(_fdb.mac, _fdb.vid, is_static, data); + if (ret) + break; + } + mutex_unlock(&priv->reg_mutex); + + return 0; +} + +int qca8k_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct qca8k_priv *priv = ds->priv; + const u8 *addr = mdb->addr; + u16 vid = mdb->vid; + + return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid); +} + +int qca8k_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct qca8k_priv *priv = ds->priv; + const u8 *addr = mdb->addr; + u16 vid = mdb->vid; + + return qca8k_fdb_search_and_del(priv, BIT(port), addr, vid); +} + +int qca8k_port_mirror_add(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) +{ + struct qca8k_priv *priv = ds->priv; + int monitor_port, ret; + u32 reg, val; + + /* Check for existent entry */ + if ((ingress ? priv->mirror_rx : priv->mirror_tx) & BIT(port)) + return -EEXIST; + + ret = regmap_read(priv->regmap, QCA8K_REG_GLOBAL_FW_CTRL0, &val); + if (ret) + return ret; + + /* QCA83xx can have only one port set to mirror mode. + * Check that the correct port is requested and return error otherwise. + * When no mirror port is set, the values is set to 0xF + */ + monitor_port = FIELD_GET(QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, val); + if (monitor_port != 0xF && monitor_port != mirror->to_local_port) + return -EEXIST; + + /* Set the monitor port */ + val = FIELD_PREP(QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, + mirror->to_local_port); + ret = regmap_update_bits(priv->regmap, QCA8K_REG_GLOBAL_FW_CTRL0, + QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, val); + if (ret) + return ret; + + if (ingress) { + reg = QCA8K_PORT_LOOKUP_CTRL(port); + val = QCA8K_PORT_LOOKUP_ING_MIRROR_EN; + } else { + reg = QCA8K_REG_PORT_HOL_CTRL1(port); + val = QCA8K_PORT_HOL_CTRL1_EG_MIRROR_EN; + } + + ret = regmap_update_bits(priv->regmap, reg, val, val); + if (ret) + return ret; + + /* Track mirror port for tx and rx to decide when the + * mirror port has to be disabled. + */ + if (ingress) + priv->mirror_rx |= BIT(port); + else + priv->mirror_tx |= BIT(port); + + return 0; +} + +void qca8k_port_mirror_del(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror) +{ + struct qca8k_priv *priv = ds->priv; + u32 reg, val; + int ret; + + if (mirror->ingress) { + reg = QCA8K_PORT_LOOKUP_CTRL(port); + val = QCA8K_PORT_LOOKUP_ING_MIRROR_EN; + } else { + reg = QCA8K_REG_PORT_HOL_CTRL1(port); + val = QCA8K_PORT_HOL_CTRL1_EG_MIRROR_EN; + } + + ret = regmap_clear_bits(priv->regmap, reg, val); + if (ret) + goto err; + + if (mirror->ingress) + priv->mirror_rx &= ~BIT(port); + else + priv->mirror_tx &= ~BIT(port); + + /* No port set to send packet to mirror port. Disable mirror port */ + if (!priv->mirror_rx && !priv->mirror_tx) { + val = FIELD_PREP(QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, 0xF); + ret = regmap_update_bits(priv->regmap, QCA8K_REG_GLOBAL_FW_CTRL0, + QCA8K_GLOBAL_FW_CTRL0_MIRROR_PORT_NUM, val); + if (ret) + goto err; + } +err: + dev_err(priv->dev, "Failed to del mirror port from %d", port); +} + +int qca8k_port_vlan_filtering(struct dsa_switch *ds, int port, + bool vlan_filtering, + struct netlink_ext_ack *extack) +{ + struct qca8k_priv *priv = ds->priv; + int ret; + + if (vlan_filtering) { + ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_VLAN_MODE_MASK, + QCA8K_PORT_LOOKUP_VLAN_MODE_SECURE); + } else { + ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_VLAN_MODE_MASK, + QCA8K_PORT_LOOKUP_VLAN_MODE_NONE); + } + + return ret; +} + +int qca8k_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + struct qca8k_priv *priv = ds->priv; + int ret; + + ret = qca8k_vlan_add(priv, port, vlan->vid, untagged); + if (ret) { + dev_err(priv->dev, "Failed to add VLAN to port %d (%d)", port, ret); + return ret; + } + + if (pvid) { + ret = qca8k_rmw(priv, QCA8K_EGRESS_VLAN(port), + QCA8K_EGREES_VLAN_PORT_MASK(port), + QCA8K_EGREES_VLAN_PORT(port, vlan->vid)); + if (ret) + return ret; + + ret = qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(port), + QCA8K_PORT_VLAN_CVID(vlan->vid) | + QCA8K_PORT_VLAN_SVID(vlan->vid)); + } + + return ret; +} + +int qca8k_port_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct qca8k_priv *priv = ds->priv; + int ret; + + ret = qca8k_vlan_del(priv, port, vlan->vid); + if (ret) + dev_err(priv->dev, "Failed to delete VLAN from port %d (%d)", port, ret); + + return ret; +} + +static bool qca8k_lag_can_offload(struct dsa_switch *ds, + struct dsa_lag lag, + struct netdev_lag_upper_info *info) +{ + struct dsa_port *dp; + int members = 0; + + if (!lag.id) + return false; + + dsa_lag_foreach_port(dp, ds->dst, &lag) + /* Includes the port joining the LAG */ + members++; + + if (members > QCA8K_NUM_PORTS_FOR_LAG) + return false; + + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + + if (info->hash_type != NETDEV_LAG_HASH_L2 && + info->hash_type != NETDEV_LAG_HASH_L23) + return false; + + return true; +} + +static int qca8k_lag_setup_hash(struct dsa_switch *ds, + struct dsa_lag lag, + struct netdev_lag_upper_info *info) +{ + struct net_device *lag_dev = lag.dev; + struct qca8k_priv *priv = ds->priv; + bool unique_lag = true; + unsigned int i; + u32 hash = 0; + + switch (info->hash_type) { + case NETDEV_LAG_HASH_L23: + hash |= QCA8K_TRUNK_HASH_SIP_EN; + hash |= QCA8K_TRUNK_HASH_DIP_EN; + fallthrough; + case NETDEV_LAG_HASH_L2: + hash |= QCA8K_TRUNK_HASH_SA_EN; + hash |= QCA8K_TRUNK_HASH_DA_EN; + break; + default: /* We should NEVER reach this */ + return -EOPNOTSUPP; + } + + /* Check if we are the unique configured LAG */ + dsa_lags_foreach_id(i, ds->dst) + if (i != lag.id && dsa_lag_by_id(ds->dst, i)) { + unique_lag = false; + break; + } + + /* Hash Mode is global. Make sure the same Hash Mode + * is set to all the 4 possible lag. + * If we are the unique LAG we can set whatever hash + * mode we want. + * To change hash mode it's needed to remove all LAG + * and change the mode with the latest. + */ + if (unique_lag) { + priv->lag_hash_mode = hash; + } else if (priv->lag_hash_mode != hash) { + netdev_err(lag_dev, "Error: Mismatched Hash Mode across different lag is not supported\n"); + return -EOPNOTSUPP; + } + + return regmap_update_bits(priv->regmap, QCA8K_TRUNK_HASH_EN_CTRL, + QCA8K_TRUNK_HASH_MASK, hash); +} + +static int qca8k_lag_refresh_portmap(struct dsa_switch *ds, int port, + struct dsa_lag lag, bool delete) +{ + struct qca8k_priv *priv = ds->priv; + int ret, id, i; + u32 val; + + /* DSA LAG IDs are one-based, hardware is zero-based */ + id = lag.id - 1; + + /* Read current port member */ + ret = regmap_read(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL0, &val); + if (ret) + return ret; + + /* Shift val to the correct trunk */ + val >>= QCA8K_REG_GOL_TRUNK_SHIFT(id); + val &= QCA8K_REG_GOL_TRUNK_MEMBER_MASK; + if (delete) + val &= ~BIT(port); + else + val |= BIT(port); + + /* Update port member. With empty portmap disable trunk */ + ret = regmap_update_bits(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL0, + QCA8K_REG_GOL_TRUNK_MEMBER(id) | + QCA8K_REG_GOL_TRUNK_EN(id), + !val << QCA8K_REG_GOL_TRUNK_SHIFT(id) | + val << QCA8K_REG_GOL_TRUNK_SHIFT(id)); + + /* Search empty member if adding or port on deleting */ + for (i = 0; i < QCA8K_NUM_PORTS_FOR_LAG; i++) { + ret = regmap_read(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL(id), &val); + if (ret) + return ret; + + val >>= QCA8K_REG_GOL_TRUNK_ID_MEM_ID_SHIFT(id, i); + val &= QCA8K_REG_GOL_TRUNK_ID_MEM_ID_MASK; + + if (delete) { + /* If port flagged to be disabled assume this member is + * empty + */ + if (val != QCA8K_REG_GOL_TRUNK_ID_MEM_ID_EN_MASK) + continue; + + val &= QCA8K_REG_GOL_TRUNK_ID_MEM_ID_PORT_MASK; + if (val != port) + continue; + } else { + /* If port flagged to be enabled assume this member is + * already set + */ + if (val == QCA8K_REG_GOL_TRUNK_ID_MEM_ID_EN_MASK) + continue; + } + + /* We have found the member to add/remove */ + break; + } + + /* Set port in the correct port mask or disable port if in delete mode */ + return regmap_update_bits(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL(id), + QCA8K_REG_GOL_TRUNK_ID_MEM_ID_EN(id, i) | + QCA8K_REG_GOL_TRUNK_ID_MEM_ID_PORT(id, i), + !delete << QCA8K_REG_GOL_TRUNK_ID_MEM_ID_SHIFT(id, i) | + port << QCA8K_REG_GOL_TRUNK_ID_MEM_ID_SHIFT(id, i)); +} + +int qca8k_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, + struct netdev_lag_upper_info *info) +{ + int ret; + + if (!qca8k_lag_can_offload(ds, lag, info)) + return -EOPNOTSUPP; + + ret = qca8k_lag_setup_hash(ds, lag, info); + if (ret) + return ret; + + return qca8k_lag_refresh_portmap(ds, port, lag, false); +} + +int qca8k_port_lag_leave(struct dsa_switch *ds, int port, + struct dsa_lag lag) +{ + return qca8k_lag_refresh_portmap(ds, port, lag, true); +} + +int qca8k_read_switch_id(struct qca8k_priv *priv) +{ + u32 val; + u8 id; + int ret; + + if (!priv->info) + return -ENODEV; + + ret = qca8k_read(priv, QCA8K_REG_MASK_CTRL, &val); + if (ret < 0) + return -ENODEV; + + id = QCA8K_MASK_CTRL_DEVICE_ID(val); + if (id != priv->info->id) { + dev_err(priv->dev, + "Switch id detected %x but expected %x", + id, priv->info->id); + return -ENODEV; + } + + priv->switch_id = id; + + /* Save revision to communicate to the internal PHY driver */ + priv->switch_revision = QCA8K_MASK_CTRL_REV_ID(val); + + return 0; +} diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index ec58d0e80a70..e36ecc9777f4 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -324,10 +324,20 @@ enum qca8k_mid_cmd { QCA8K_MIB_CAST = 3, }; +struct qca8k_priv; + +struct qca8k_info_ops { + int (*autocast_mib)(struct dsa_switch *ds, int port, u64 *data); + /* TODO: remove these extra ops when we can support regmap bulk read/write */ + int (*read_eth)(struct qca8k_priv *priv, u32 reg, u32 *val, int len); + int (*write_eth)(struct qca8k_priv *priv, u32 reg, u32 *val, int len); +}; + struct qca8k_match_data { u8 id; bool reduced_package; u8 mib_count; + const struct qca8k_info_ops *ops; }; enum { @@ -401,6 +411,7 @@ struct qca8k_priv { struct qca8k_mdio_cache mdio_cache; struct qca8k_pcs pcs_port_0; struct qca8k_pcs pcs_port_6; + const struct qca8k_match_data *info; }; struct qca8k_mib_desc { @@ -416,4 +427,93 @@ struct qca8k_fdb { u8 mac[6]; }; +/* Common setup function */ +extern const struct qca8k_mib_desc ar8327_mib[]; +extern const struct regmap_access_table qca8k_readable_table; +int qca8k_mib_init(struct qca8k_priv *priv); +void qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable); +int qca8k_read_switch_id(struct qca8k_priv *priv); + +/* Common read/write/rmw function */ +int qca8k_read(struct qca8k_priv *priv, u32 reg, u32 *val); +int qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val); +int qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val); + +/* Common ops function */ +void qca8k_fdb_flush(struct qca8k_priv *priv); + +/* Common ethtool stats function */ +void qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data); +void qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data); +int qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset); + +/* Common eee function */ +int qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee); +int qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); + +/* Common bridge function */ +void qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state); +int qca8k_port_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge, + bool *tx_fwd_offload, + struct netlink_ext_ack *extack); +void qca8k_port_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge); + +/* Common port enable/disable function */ +int qca8k_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy); +void qca8k_port_disable(struct dsa_switch *ds, int port); + +/* Common MTU function */ +int qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu); +int qca8k_port_max_mtu(struct dsa_switch *ds, int port); + +/* Common fast age function */ +void qca8k_port_fast_age(struct dsa_switch *ds, int port); +int qca8k_set_ageing_time(struct dsa_switch *ds, unsigned int msecs); + +/* Common FDB function */ +int qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr, + u16 port_mask, u16 vid); +int qca8k_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db); +int qca8k_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db); +int qca8k_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data); + +/* Common MDB function */ +int qca8k_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); +int qca8k_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); + +/* Common port mirror function */ +int qca8k_port_mirror_add(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack); +void qca8k_port_mirror_del(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror); + +/* Common port VLAN function */ +int qca8k_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, + struct netlink_ext_ack *extack); +int qca8k_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); +int qca8k_port_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); + +/* Common port LAG function */ +int qca8k_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag, + struct netdev_lag_upper_info *info); +int qca8k_port_lag_leave(struct dsa_switch *ds, int port, + struct dsa_lag lag); + #endif /* __QCA8K_H */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 6b3d4f4c2a75..14df8cfc2946 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -20,6 +20,8 @@ #include "bnxt_ulp.h" #include "bnxt_ptp.h" #include "bnxt_coredump.h" +#include "bnxt_nvm_defs.h" +#include "bnxt_ethtool.h" static void __bnxt_fw_recover(struct bnxt *bp) { @@ -610,6 +612,63 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti return rc; } +static bool bnxt_nvm_test(struct bnxt *bp, struct netlink_ext_ack *extack) +{ + u32 datalen; + u16 index; + u8 *buf; + + if (bnxt_find_nvram_item(bp->dev, BNX_DIR_TYPE_VPD, + BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, + &index, NULL, &datalen) || !datalen) { + NL_SET_ERR_MSG_MOD(extack, "nvm test vpd entry error"); + return false; + } + + buf = kzalloc(datalen, GFP_KERNEL); + if (!buf) { + NL_SET_ERR_MSG_MOD(extack, "insufficient memory for nvm test"); + return false; + } + + if (bnxt_get_nvram_item(bp->dev, index, 0, datalen, buf)) { + NL_SET_ERR_MSG_MOD(extack, "nvm test vpd read error"); + goto err; + } + + if (bnxt_flash_nvram(bp->dev, BNX_DIR_TYPE_VPD, BNX_DIR_ORDINAL_FIRST, + BNX_DIR_EXT_NONE, 0, 0, buf, datalen)) { + NL_SET_ERR_MSG_MOD(extack, "nvm test vpd write error"); + goto err; + } + + return true; + +err: + kfree(buf); + return false; +} + +static bool bnxt_dl_selftest_check(struct devlink *dl, unsigned int id, + struct netlink_ext_ack *extack) +{ + return id == DEVLINK_ATTR_SELFTEST_ID_FLASH; +} + +static enum devlink_selftest_status bnxt_dl_selftest_run(struct devlink *dl, + unsigned int id, + struct netlink_ext_ack *extack) +{ + struct bnxt *bp = bnxt_get_bp_from_dl(dl); + + if (id == DEVLINK_ATTR_SELFTEST_ID_FLASH) + return bnxt_nvm_test(bp, extack) ? + DEVLINK_SELFTEST_STATUS_PASS : + DEVLINK_SELFTEST_STATUS_FAIL; + + return DEVLINK_SELFTEST_STATUS_SKIP; +} + static const struct devlink_ops bnxt_dl_ops = { #ifdef CONFIG_BNXT_SRIOV .eswitch_mode_set = bnxt_dl_eswitch_mode_set, @@ -622,6 +681,8 @@ static const struct devlink_ops bnxt_dl_ops = { .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET), .reload_down = bnxt_dl_reload_down, .reload_up = bnxt_dl_reload_up, + .selftest_check = bnxt_dl_selftest_check, + .selftest_run = bnxt_dl_selftest_run, }; static const struct devlink_ops bnxt_vf_dl_ops; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7191e5d74208..87eb5362ad70 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2176,14 +2176,14 @@ static void bnxt_print_admin_err(struct bnxt *bp) netdev_info(bp->dev, "PF does not have admin privileges to flash or reset the device\n"); } -static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, - u16 ext, u16 *index, u32 *item_length, - u32 *data_length); +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length); -static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, - u16 dir_ordinal, u16 dir_ext, u16 dir_attr, - u32 dir_item_len, const u8 *data, - size_t data_len) +int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, + u16 dir_ordinal, u16 dir_ext, u16 dir_attr, + u32 dir_item_len, const u8 *data, + size_t data_len) { struct bnxt *bp = netdev_priv(dev); struct hwrm_nvm_write_input *req; @@ -2836,8 +2836,8 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data) return rc; } -static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset, - u32 length, u8 *data) +int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset, + u32 length, u8 *data) { struct bnxt *bp = netdev_priv(dev); int rc; @@ -2871,9 +2871,9 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset, return rc; } -static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, - u16 ext, u16 *index, u32 *item_length, - u32 *data_length) +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length) { struct hwrm_nvm_find_dir_entry_output *output; struct hwrm_nvm_find_dir_entry_input *req; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h index a59284215e78..a8ecef8ab82c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h @@ -58,5 +58,17 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware int bnxt_get_pkginfo(struct net_device *dev, char *ver, int size); void bnxt_ethtool_init(struct bnxt *bp); void bnxt_ethtool_free(struct bnxt *bp); +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length); +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length); +int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, + u16 dir_ordinal, u16 dir_ext, u16 dir_attr, + u32 dir_item_len, const u8 *data, + size_t data_len); +int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset, + u32 length, u8 *data); #endif diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4cd4f57ca2aa..494fe961a49d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4798,13 +4798,15 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config }, { .compatible = "cdns,at91rm9200-emac", .data = &emac_config }, { .compatible = "cdns,emac", .data = &emac_config }, - { .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config}, - { .compatible = "cdns,zynq-gem", .data = &zynq_config }, + { .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config}, /* deprecated */ + { .compatible = "cdns,zynq-gem", .data = &zynq_config }, /* deprecated */ { .compatible = "sifive,fu540-c000-gem", .data = &fu540_c000_config }, { .compatible = "microchip,mpfs-macb", .data = &mpfs_config }, { .compatible = "microchip,sama7g5-gem", .data = &sama7g5_gem_config }, { .compatible = "microchip,sama7g5-emac", .data = &sama7g5_emac_config }, - { .compatible = "cdns,versal-gem", .data = &versal_config}, + { .compatible = "xlnx,zynqmp-gem", .data = &zynqmp_config}, + { .compatible = "xlnx,zynq-gem", .data = &zynq_config }, + { .compatible = "xlnx,versal-gem", .data = &versal_config}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, macb_dt_ids); diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c index 0f6a549b9f67..29a6c2ede43a 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c @@ -142,6 +142,7 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, int ref_ok, struct funeth_txq *xdp_q) { struct bpf_prog *xdp_prog; + struct xdp_frame *xdpf; struct xdp_buff xdp; u32 act; @@ -163,7 +164,9 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, case XDP_TX: if (unlikely(!ref_ok)) goto pass; - if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data)) + + xdpf = xdp_convert_buff_to_frame(&xdp); + if (!xdpf || !fun_xdp_tx(xdp_q, xdpf)) goto xdp_error; FUN_QSTAT_INC(q, xdp_tx); q->xdp_flush |= FUN_XDP_FLUSH_TX; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c index a97e3af00cb9..54bdeb65a2bd 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -487,7 +487,7 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget) do { fun_xdp_unmap(q, reclaim_idx); - page_frag_free(q->info[reclaim_idx].vaddr); + xdp_return_frame(q->info[reclaim_idx].xdpf); trace_funeth_tx_free(q, reclaim_idx, 1, head); @@ -500,11 +500,11 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget) return npkts; } -bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) +bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf) { struct fun_eth_tx_req *req; struct fun_dataop_gl *gle; - unsigned int idx; + unsigned int idx, len; dma_addr_t dma; if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES) @@ -515,7 +515,8 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) return false; } - dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE); + len = xdpf->len; + dma = dma_map_single(q->dma_dev, xdpf->data, len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(q->dma_dev, dma))) { FUN_QSTAT_INC(q, tx_map_err); return false; @@ -535,7 +536,7 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) gle = (struct fun_dataop_gl *)req->dataop.imm; fun_dataop_gl_init(gle, 0, 0, len, dma); - q->info[idx].vaddr = data; + q->info[idx].xdpf = xdpf; u64_stats_update_begin(&q->syncp); q->stats.tx_bytes += len; @@ -566,12 +567,9 @@ int fun_xdp_xmit_frames(struct net_device *dev, int n, if (unlikely(q_idx >= fp->num_xdpqs)) return -ENXIO; - for (q = xdpqs[q_idx], i = 0; i < n; i++) { - const struct xdp_frame *xdpf = frames[i]; - - if (!fun_xdp_tx(q, xdpf->data, xdpf->len)) + for (q = xdpqs[q_idx], i = 0; i < n; i++) + if (!fun_xdp_tx(q, frames[i])) break; - } if (unlikely(flags & XDP_XMIT_FLUSH)) fun_txq_wr_db(q); @@ -598,7 +596,7 @@ static void fun_xdpq_purge(struct funeth_txq *q) unsigned int idx = q->cons_cnt & q->mask; fun_xdp_unmap(q, idx); - page_frag_free(q->info[idx].vaddr); + xdp_return_frame(q->info[idx].xdpf); q->cons_cnt++; } } diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h index 1711f82cad71..53b7e95213a8 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -96,8 +96,8 @@ struct funeth_txq_stats { /* per Tx queue SW counters */ struct funeth_tx_info { /* per Tx descriptor state */ union { - struct sk_buff *skb; /* associated packet */ - void *vaddr; /* start address for XDP */ + struct sk_buff *skb; /* associated packet (sk_buff path) */ + struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */ }; }; @@ -246,7 +246,7 @@ static inline int fun_irq_node(const struct fun_irq *p) int fun_rxq_napi_poll(struct napi_struct *napi, int budget); int fun_txq_napi_poll(struct napi_struct *napi, int budget); netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev); -bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len); +bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf); int fun_xdp_xmit_frames(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index fb9f476fb33c..b36bf9c3e1e4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2033,11 +2033,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, * non-zero req_queue_pairs says that user requested a new * queue count via ethtool's set_channels, so use this * value for queues distribution across traffic classes + * We need at least one queue pair for the interface + * to be usable as we see in else statement. */ if (vsi->req_queue_pairs > 0) vsi->num_queue_pairs = vsi->req_queue_pairs; else if (pf->flags & I40E_FLAG_MSIX_ENABLED) vsi->num_queue_pairs = pf->num_lan_msix; + else + vsi->num_queue_pairs = 1; } /* Number of queues per enabled TC */ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 1a2e54dbc5a1..36b440b1aaff 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -52,6 +52,7 @@ #include <net/udp_tunnel.h> #include <net/vxlan.h> #include <net/gtp.h> +#include <linux/ppp_defs.h> #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 70335f6e8524..4efa5e5846e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -658,7 +658,8 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) rx_desc = ICE_RX_DESC(rx_ring, i); if (!(rx_desc->wb.status_error0 & - cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS))) + (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) | + cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) continue; rx_buf = &rx_ring->rx_buf[i]; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index ada5198b5b16..4b3bb19e1d06 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1964,8 +1964,11 @@ ice_get_sw_fv_list(struct ice_hw *hw, struct ice_prot_lkup_ext *lkups, } } } while (fv); - if (list_empty(fv_list)) + if (list_empty(fv_list)) { + dev_warn(ice_hw_to_dev(hw), "Required profiles not found in currently loaded DDP package"); return -EIO; + } + return 0; err: diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 313716615e98..29914eedb3e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4656,6 +4656,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_set_safe_mode_caps(hw); } + hw->ucast_shared = true; + err = ice_init_pf(pf); if (err) { dev_err(dev, "ice_init_pf failed: %d\n", err); @@ -6005,10 +6007,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) if (vsi->netdev) { ice_set_rx_mode(vsi->netdev); - err = ice_vsi_vlan_setup(vsi); + if (vsi->type != ICE_VSI_LB) { + err = ice_vsi_vlan_setup(vsi); - if (err) - return err; + if (err) + return err; + } } ice_vsi_cfg_dcb_rings(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index d4a0d089649c..560efc7654c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -43,6 +43,7 @@ enum ice_protocol_type { ICE_NVGRE, ICE_GTP, ICE_GTP_NO_PAY, + ICE_PPPOE, ICE_VLAN_EX, ICE_VLAN_IN, ICE_VXLAN_GPE, @@ -109,6 +110,7 @@ enum ice_prot_id { #define ICE_TCP_IL_HW 49 #define ICE_UDP_ILOS_HW 53 #define ICE_GRE_OF_HW 64 +#define ICE_PPPOE_HW 103 #define ICE_UDP_OF_HW 52 /* UDP Tunnels */ #define ICE_META_DATA_ID_HW 255 /* this is used for tunnel and VLAN type */ @@ -207,6 +209,14 @@ struct ice_udp_gtp_hdr { u8 rsvrd; }; +struct ice_pppoe_hdr { + u8 rsrvd_ver_type; + u8 rsrvd_code; + __be16 session_id; + __be16 length; + __be16 ppp_prot_id; /* control and data only */ +}; + struct ice_nvgre_hdr { __be16 flags; __be16 protocol; @@ -224,6 +234,7 @@ union ice_prot_hdr { struct ice_udp_tnl_hdr tnl_hdr; struct ice_nvgre_hdr nvgre_hdr; struct ice_udp_gtp_hdr gtp_hdr; + struct ice_pppoe_hdr pppoe_hdr; }; /* This is mapping table entry that maps every word within a given protocol diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 86093b2511d8..3ba1408c56a9 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1310,39 +1310,6 @@ out_put_vf: } /** - * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch - * @pf: PF used to reference the switch's rules - * @umac: unicast MAC to compare against existing switch rules - * - * Return true on the first/any match, else return false - */ -static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac) -{ - struct ice_sw_recipe *mac_recipe_list = - &pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC]; - struct ice_fltr_mgmt_list_entry *list_itr; - struct list_head *rule_head; - struct mutex *rule_lock; /* protect MAC filter list access */ - - rule_head = &mac_recipe_list->filt_rules; - rule_lock = &mac_recipe_list->filt_rule_lock; - - mutex_lock(rule_lock); - list_for_each_entry(list_itr, rule_head, list_entry) { - u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0]; - - if (ether_addr_equal(existing_mac, umac)) { - mutex_unlock(rule_lock); - return true; - } - } - - mutex_unlock(rule_lock); - - return false; -} - -/** * ice_set_vf_mac * @netdev: network interface device structure * @vf_id: VF identifier @@ -1376,13 +1343,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) if (ret) goto out_put_vf; - if (ice_unicast_mac_exists(pf, mac)) { - netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n", - mac, vf_id, mac); - ret = -EINVAL; - goto out_put_vf; - } - mutex_lock(&vf->cfg_lock); /* VF is notified of its new MAC via the PF's response to the diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 2d1274774987..4a6a8334a0e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -41,6 +41,7 @@ enum { ICE_PKT_INNER_UDP = BIT(7), ICE_PKT_GTP_NOPAY = BIT(8), ICE_PKT_KMALLOC = BIT(9), + ICE_PKT_PPPOE = BIT(10), }; struct ice_dummy_pkt_offsets { @@ -1109,6 +1110,154 @@ ICE_DECLARE_PKT_TEMPLATE(ipv6_gtp) = { 0x00, 0x00, }; +ICE_DECLARE_PKT_OFFSETS(pppoe_ipv4_tcp) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_PPPOE, 14 }, + { ICE_IPV4_OFOS, 22 }, + { ICE_TCP_IL, 42 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv4_tcp) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x88, 0x64, /* ICE_ETYPE_OL 12 */ + + 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */ + 0x00, 0x16, + + 0x00, 0x21, /* PPP Link Layer 20 */ + + 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 22 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 42 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 bytes alignment */ +}; + +ICE_DECLARE_PKT_OFFSETS(pppoe_ipv4_udp) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_PPPOE, 14 }, + { ICE_IPV4_OFOS, 22 }, + { ICE_UDP_ILOS, 42 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv4_udp) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x88, 0x64, /* ICE_ETYPE_OL 12 */ + + 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */ + 0x00, 0x16, + + 0x00, 0x21, /* PPP Link Layer 20 */ + + 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 22 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 42 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 bytes alignment */ +}; + +ICE_DECLARE_PKT_OFFSETS(pppoe_ipv6_tcp) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_PPPOE, 14 }, + { ICE_IPV6_OFOS, 22 }, + { ICE_TCP_IL, 62 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv6_tcp) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x88, 0x64, /* ICE_ETYPE_OL 12 */ + + 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */ + 0x00, 0x2a, + + 0x00, 0x57, /* PPP Link Layer 20 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 22 */ + 0x00, 0x14, 0x06, 0x00, /* Next header is TCP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 62 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 bytes alignment */ +}; + +ICE_DECLARE_PKT_OFFSETS(pppoe_ipv6_udp) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_PPPOE, 14 }, + { ICE_IPV6_OFOS, 22 }, + { ICE_UDP_ILOS, 62 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv6_udp) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x88, 0x64, /* ICE_ETYPE_OL 12 */ + + 0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */ + 0x00, 0x2a, + + 0x00, 0x57, /* PPP Link Layer 20 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 22 */ + 0x00, 0x08, 0x11, 0x00, /* Next header UDP*/ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 62 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 bytes alignment */ +}; + static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = { ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPU | ICE_PKT_OUTER_IPV6 | ICE_PKT_GTP_NOPAY), @@ -1135,6 +1284,11 @@ static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = { ICE_PKT_PROFILE(ipv4_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU), ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPC | ICE_PKT_OUTER_IPV6), ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPC), + ICE_PKT_PROFILE(pppoe_ipv6_udp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6 | + ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(pppoe_ipv6_tcp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6), + ICE_PKT_PROFILE(pppoe_ipv4_udp, ICE_PKT_PPPOE | ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(pppoe_ipv4_tcp, ICE_PKT_PPPOE), ICE_PKT_PROFILE(gre_ipv6_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6 | ICE_PKT_INNER_TCP), ICE_PKT_PROFILE(gre_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_TCP), @@ -4480,6 +4634,7 @@ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { { ICE_NVGRE, { 0, 2, 4, 6 } }, { ICE_GTP, { 8, 10, 12, 14, 16, 18, 20, 22 } }, { ICE_GTP_NO_PAY, { 8, 10, 12, 14 } }, + { ICE_PPPOE, { 0, 2, 4, 6 } }, { ICE_VLAN_EX, { 2, 0 } }, { ICE_VLAN_IN, { 2, 0 } }, }; @@ -4502,6 +4657,7 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { { ICE_NVGRE, ICE_GRE_OF_HW }, { ICE_GTP, ICE_UDP_OF_HW }, { ICE_GTP_NO_PAY, ICE_UDP_ILOS_HW }, + { ICE_PPPOE, ICE_PPPOE_HW }, { ICE_VLAN_EX, ICE_VLAN_OF_HW }, { ICE_VLAN_IN, ICE_VLAN_OL_HW }, }; @@ -5580,6 +5736,12 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, match |= ICE_PKT_INNER_IPV6; else if (lkups[i].type == ICE_GTP_NO_PAY) match |= ICE_PKT_GTP_NOPAY; + else if (lkups[i].type == ICE_PPPOE) { + match |= ICE_PKT_PPPOE; + if (lkups[i].h_u.pppoe_hdr.ppp_prot_id == + htons(PPP_IPV6)) + match |= ICE_PKT_OUTER_IPV6; + } } while (ret->match && (match & ret->match) != ret->match) @@ -5677,6 +5839,9 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, case ICE_GTP: len = sizeof(struct ice_udp_gtp_hdr); break; + case ICE_PPPOE: + len = sizeof(struct ice_pppoe_hdr); + break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 14795157846b..a298862857a8 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -54,6 +54,11 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, if (flags & ICE_TC_FLWR_FIELD_CVLAN) lkups_cnt++; + /* are PPPoE options specified? */ + if (flags & (ICE_TC_FLWR_FIELD_PPPOE_SESSID | + ICE_TC_FLWR_FIELD_PPP_PROTO)) + lkups_cnt++; + /* are IPv[4|6] fields specified? */ if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4 | ICE_TC_FLWR_FIELD_DEST_IPV6 | ICE_TC_FLWR_FIELD_SRC_IPV6)) @@ -350,6 +355,28 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, i++; } + if (flags & (ICE_TC_FLWR_FIELD_PPPOE_SESSID | + ICE_TC_FLWR_FIELD_PPP_PROTO)) { + struct ice_pppoe_hdr *vals, *masks; + + vals = &list[i].h_u.pppoe_hdr; + masks = &list[i].m_u.pppoe_hdr; + + list[i].type = ICE_PPPOE; + + if (flags & ICE_TC_FLWR_FIELD_PPPOE_SESSID) { + vals->session_id = headers->pppoe_hdr.session_id; + masks->session_id = cpu_to_be16(0xFFFF); + } + + if (flags & ICE_TC_FLWR_FIELD_PPP_PROTO) { + vals->ppp_prot_id = headers->pppoe_hdr.ppp_proto; + masks->ppp_prot_id = cpu_to_be16(0xFFFF); + } + + i++; + } + /* copy L3 (IPv[4|6]: src, dest) address */ if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4)) { @@ -694,6 +721,31 @@ exit: } /** + * ice_tc_set_pppoe - Parse PPPoE fields from TC flower filter + * @match: Pointer to flow match structure + * @fltr: Pointer to filter structure + * @headers: Pointer to outer header fields + * @returns PPP protocol used in filter (ppp_ses or ppp_disc) + */ +static u16 +ice_tc_set_pppoe(struct flow_match_pppoe *match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers) +{ + if (match->mask->session_id) { + fltr->flags |= ICE_TC_FLWR_FIELD_PPPOE_SESSID; + headers->pppoe_hdr.session_id = match->key->session_id; + } + + if (match->mask->ppp_proto) { + fltr->flags |= ICE_TC_FLWR_FIELD_PPP_PROTO; + headers->pppoe_hdr.ppp_proto = match->key->ppp_proto; + } + + return be16_to_cpu(match->key->type); +} + +/** * ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter * @match: Pointer to flow match structure * @fltr: Pointer to filter structure @@ -988,7 +1040,8 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | - BIT(FLOW_DISSECTOR_KEY_PORTS))) { + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_PPPOE))) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used"); return -EOPNOTSUPP; } @@ -1124,6 +1177,22 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, headers->cvlan_hdr.vlan_prio = match.key->vlan_priority; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PPPOE)) { + struct flow_match_pppoe match; + + flow_rule_match_pppoe(rule, &match); + n_proto_key = ice_tc_set_pppoe(&match, fltr, headers); + + /* If ethertype equals ETH_P_PPP_SES, n_proto might be + * overwritten by encapsulated protocol (ppp_proto field) or set + * to 0. To correct this, flow_match_pppoe provides the type + * field, which contains the actual ethertype (ETH_P_PPP_SES). + */ + headers->l2_key.n_proto = cpu_to_be16(n_proto_key); + headers->l2_mask.n_proto = cpu_to_be16(0xFFFF); + fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_match_control match; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 0193874cd203..91cd3d3778c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -24,6 +24,8 @@ #define ICE_TC_FLWR_FIELD_ETH_TYPE_ID BIT(17) #define ICE_TC_FLWR_FIELD_ENC_OPTS BIT(18) #define ICE_TC_FLWR_FIELD_CVLAN BIT(19) +#define ICE_TC_FLWR_FIELD_PPPOE_SESSID BIT(20) +#define ICE_TC_FLWR_FIELD_PPP_PROTO BIT(21) #define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF @@ -44,6 +46,11 @@ struct ice_tc_vlan_hdr { __be16 vlan_tpid; }; +struct ice_tc_pppoe_hdr { + __be16 session_id; + __be16 ppp_proto; +}; + struct ice_tc_l2_hdr { u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; @@ -84,6 +91,7 @@ struct ice_tc_flower_lyr_2_4_hdrs { struct ice_tc_l2_hdr l2_mask; struct ice_tc_vlan_hdr vlan_hdr; struct ice_tc_vlan_hdr cvlan_hdr; + struct ice_tc_pppoe_hdr pppoe_hdr; /* L3 (IPv4[6]) layer fields with their mask */ struct ice_tc_l3_hdr l3_key; struct ice_tc_l3_hdr l3_mask; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3f8b7274ed2f..836dce840712 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1751,11 +1751,13 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) protocol = vlan_get_protocol(skb); - if (eth_p_mpls(protocol)) + if (eth_p_mpls(protocol)) { ip.hdr = skb_inner_network_header(skb); - else + l4.hdr = skb_checksum_start(skb); + } else { ip.hdr = skb_network_header(skb); - l4.hdr = skb_checksum_start(skb); + l4.hdr = skb_transport_header(skb); + } /* compute outer L2 header size */ l2_len = ip.hdr - skb->data; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index b2b5d2ee83a5..4508308f2c28 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -2971,7 +2971,8 @@ ice_vc_validate_add_vlan_filter_list(struct ice_vsi *vsi, struct virtchnl_vlan_filtering_caps *vfc, struct virtchnl_vlan_filter_list_v2 *vfl) { - u16 num_requested_filters = vsi->num_vlan + vfl->num_elements; + u16 num_requested_filters = ice_vsi_num_non_zero_vlans(vsi) + + vfl->num_elements; if (num_requested_filters > vfc->max_filters) return false; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 28b19945d716..e64318c110fd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -28,6 +28,9 @@ #define MAX_RATE_EXPONENT 0x0FULL #define MAX_RATE_MANTISSA 0xFFULL +#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL +#define CN10K_MAX_BURST_SIZE 8453888ULL + /* Bitfields in NIX_TLX_PIR register */ #define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) #define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) @@ -35,6 +38,9 @@ #define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) #define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) +#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29) +#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44) + struct otx2_tc_flow_stats { u64 bytes; u64 pkts; @@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic) } EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap); -static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp, - u32 *burst_mantissa) +static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst, + u32 *burst_exp, u32 *burst_mantissa) { + int max_burst, max_mantissa; unsigned int tmp; + if (is_dev_otx2(nic->pdev)) { + max_burst = MAX_BURST_SIZE; + max_mantissa = MAX_BURST_MANTISSA; + } else { + max_burst = CN10K_MAX_BURST_SIZE; + max_mantissa = CN10K_MAX_BURST_MANTISSA; + } + /* Burst is calculated as * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256 * Max supported burst size is 130,816 bytes. */ - burst = min_t(u32, burst, MAX_BURST_SIZE); + burst = min_t(u32, burst, max_burst); if (burst) { *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0; tmp = burst - rounddown_pow_of_two(burst); - if (burst < MAX_BURST_MANTISSA) + if (burst < max_mantissa) *burst_mantissa = tmp * 2; else *burst_mantissa = tmp / (1ULL << (*burst_exp - 7)); } else { *burst_exp = MAX_BURST_EXPONENT; - *burst_mantissa = MAX_BURST_MANTISSA; + *burst_mantissa = max_mantissa; } } -static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp, +static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp, u32 *mantissa, u32 *div_exp) { - unsigned int tmp; + u64 tmp; /* Rate calculation by hardware * @@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp, } } -static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate) +static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, + u64 maxrate, u32 burst) { - struct otx2_hw *hw = &nic->hw; - struct nix_txschq_config *req; u32 burst_exp, burst_mantissa; u32 exp, mantissa, div_exp; + u64 regval = 0; + + /* Get exponent and mantissa values from the desired rate */ + otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa); + otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp); + + if (is_dev_otx2(nic->pdev)) { + regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) | + FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) | + FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | + FIELD_PREP(TLX_RATE_EXPONENT, exp) | + FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); + } else { + regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) | + FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) | + FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | + FIELD_PREP(TLX_RATE_EXPONENT, exp) | + FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); + } + + return regval; +} + +static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, + u32 burst, u64 maxrate) +{ + struct otx2_hw *hw = &nic->hw; + struct nix_txschq_config *req; int txschq, err; /* All SQs share the same TL4, so pick the first scheduler */ txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0]; - /* Get exponent and mantissa values from the desired rate */ - otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa); - otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp); - mutex_lock(&nic->mbox.lock); req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox); if (!req) { @@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma req->lvl = NIX_TXSCH_LVL_TL4; req->num_regs = 1; req->reg[0] = NIX_AF_TL4X_PIR(txschq); - req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) | - FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) | - FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | - FIELD_PREP(TLX_RATE_EXPONENT, exp) | - FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); + req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst); err = otx2_sync_mbox_msg(&nic->mbox); mutex_unlock(&nic->mbox.lock); @@ -230,7 +264,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, struct netlink_ext_ack *extack = cls->common.extack; struct flow_action *actions = &cls->rule->action; struct flow_action_entry *entry; - u32 rate; + u64 rate; int err; err = otx2_tc_validate_flow(nic, actions, extack); @@ -256,7 +290,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, } /* Convert bytes per second to Mbps */ rate = entry->police.rate_bytes_ps * 8; - rate = max_t(u32, rate / 1000000, 1); + rate = max_t(u64, rate / 1000000, 1); err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate); if (err) return err; @@ -614,21 +648,27 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, flow_spec->dport = match.key->dst; flow_mask->dport = match.mask->dst; - if (ip_proto == IPPROTO_UDP) - req->features |= BIT_ULL(NPC_DPORT_UDP); - else if (ip_proto == IPPROTO_TCP) - req->features |= BIT_ULL(NPC_DPORT_TCP); - else if (ip_proto == IPPROTO_SCTP) - req->features |= BIT_ULL(NPC_DPORT_SCTP); + + if (flow_mask->dport) { + if (ip_proto == IPPROTO_UDP) + req->features |= BIT_ULL(NPC_DPORT_UDP); + else if (ip_proto == IPPROTO_TCP) + req->features |= BIT_ULL(NPC_DPORT_TCP); + else if (ip_proto == IPPROTO_SCTP) + req->features |= BIT_ULL(NPC_DPORT_SCTP); + } flow_spec->sport = match.key->src; flow_mask->sport = match.mask->src; - if (ip_proto == IPPROTO_UDP) - req->features |= BIT_ULL(NPC_SPORT_UDP); - else if (ip_proto == IPPROTO_TCP) - req->features |= BIT_ULL(NPC_SPORT_TCP); - else if (ip_proto == IPPROTO_SCTP) - req->features |= BIT_ULL(NPC_SPORT_SCTP); + + if (flow_mask->sport) { + if (ip_proto == IPPROTO_UDP) + req->features |= BIT_ULL(NPC_SPORT_UDP); + else if (ip_proto == IPPROTO_TCP) + req->features |= BIT_ULL(NPC_SPORT_TCP); + else if (ip_proto == IPPROTO_SCTP) + req->features |= BIT_ULL(NPC_SPORT_SCTP); + } } return otx2_tc_parse_actions(nic, &rule->action, req, f, node); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c370d6589596..d9426b01f462 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1001,7 +1001,7 @@ static int txd_to_idx(struct mtk_tx_ring *ring, void *dma, u32 txd_size) } static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf, - bool napi) + struct xdp_frame_bulk *bq, bool napi) { if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { @@ -1031,23 +1031,24 @@ static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf, } } - if (tx_buf->type == MTK_TYPE_SKB) { - if (tx_buf->data && - tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { + if (tx_buf->data && tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { + if (tx_buf->type == MTK_TYPE_SKB) { struct sk_buff *skb = tx_buf->data; if (napi) napi_consume_skb(skb, napi); else dev_kfree_skb_any(skb); - } - } else if (tx_buf->data) { - struct xdp_frame *xdpf = tx_buf->data; + } else { + struct xdp_frame *xdpf = tx_buf->data; - if (napi && tx_buf->type == MTK_TYPE_XDP_TX) - xdp_return_frame_rx_napi(xdpf); - else - xdp_return_frame(xdpf); + if (napi && tx_buf->type == MTK_TYPE_XDP_TX) + xdp_return_frame_rx_napi(xdpf); + else if (bq) + xdp_return_frame_bulk(xdpf, bq); + else + xdp_return_frame(xdpf); + } } tx_buf->flags = 0; tx_buf->data = NULL; @@ -1297,7 +1298,7 @@ err_dma: tx_buf = mtk_desc_to_tx_buf(ring, itxd, soc->txrx.txd_size); /* unmap dma */ - mtk_tx_unmap(eth, tx_buf, false); + mtk_tx_unmap(eth, tx_buf, NULL, false); itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) @@ -1523,68 +1524,112 @@ static void mtk_rx_put_buff(struct mtk_rx_ring *ring, void *data, bool napi) skb_free_frag(data); } +static int mtk_xdp_frame_map(struct mtk_eth *eth, struct net_device *dev, + struct mtk_tx_dma_desc_info *txd_info, + struct mtk_tx_dma *txd, struct mtk_tx_buf *tx_buf, + void *data, u16 headroom, int index, bool dma_map) +{ + struct mtk_tx_ring *ring = ð->tx_ring; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_tx_dma *txd_pdma; + + if (dma_map) { /* ndo_xdp_xmit */ + txd_info->addr = dma_map_single(eth->dma_dev, data, + txd_info->size, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(eth->dma_dev, txd_info->addr))) + return -ENOMEM; + + tx_buf->flags |= MTK_TX_FLAGS_SINGLE0; + } else { + struct page *page = virt_to_head_page(data); + + txd_info->addr = page_pool_get_dma_addr(page) + + sizeof(struct xdp_frame) + headroom; + dma_sync_single_for_device(eth->dma_dev, txd_info->addr, + txd_info->size, DMA_BIDIRECTIONAL); + } + mtk_tx_set_dma_desc(dev, txd, txd_info); + + tx_buf->flags |= !mac->id ? MTK_TX_FLAGS_FPORT0 : MTK_TX_FLAGS_FPORT1; + tx_buf->type = dma_map ? MTK_TYPE_XDP_NDO : MTK_TYPE_XDP_TX; + tx_buf->data = (void *)MTK_DMA_DUMMY_DESC; + + txd_pdma = qdma_to_pdma(ring, txd); + setup_tx_buf(eth, tx_buf, txd_pdma, txd_info->addr, txd_info->size, + index); + + return 0; +} + static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, struct net_device *dev, bool dma_map) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); const struct mtk_soc_data *soc = eth->soc; struct mtk_tx_ring *ring = ð->tx_ring; struct mtk_tx_dma_desc_info txd_info = { .size = xdpf->len, .first = true, - .last = true, + .last = !xdp_frame_has_frags(xdpf), }; - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_tx_dma *txd, *txd_pdma; - int err = 0, index = 0, n_desc = 1; - struct mtk_tx_buf *tx_buf; + int err, index = 0, n_desc = 1, nr_frags; + struct mtk_tx_dma *htxd, *txd, *txd_pdma; + struct mtk_tx_buf *htx_buf, *tx_buf; + void *data = xdpf->data; if (unlikely(test_bit(MTK_RESETTING, ð->state))) return -EBUSY; - if (unlikely(atomic_read(&ring->free_count) <= 1)) + nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; + if (unlikely(atomic_read(&ring->free_count) <= 1 + nr_frags)) return -EBUSY; spin_lock(ð->page_lock); txd = ring->next_free; if (txd == ring->last_free) { - err = -ENOMEM; - goto out; + spin_unlock(ð->page_lock); + return -ENOMEM; } + htxd = txd; tx_buf = mtk_desc_to_tx_buf(ring, txd, soc->txrx.txd_size); memset(tx_buf, 0, sizeof(*tx_buf)); + htx_buf = tx_buf; - if (dma_map) { /* ndo_xdp_xmit */ - txd_info.addr = dma_map_single(eth->dma_dev, xdpf->data, - txd_info.size, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(eth->dma_dev, txd_info.addr))) { - err = -ENOMEM; - goto out; - } - tx_buf->flags |= MTK_TX_FLAGS_SINGLE0; - } else { - struct page *page = virt_to_head_page(xdpf->data); + for (;;) { + err = mtk_xdp_frame_map(eth, dev, &txd_info, txd, tx_buf, + data, xdpf->headroom, index, dma_map); + if (err < 0) + goto unmap; - txd_info.addr = page_pool_get_dma_addr(page) + - sizeof(*xdpf) + xdpf->headroom; - dma_sync_single_for_device(eth->dma_dev, txd_info.addr, - txd_info.size, - DMA_BIDIRECTIONAL); - } - mtk_tx_set_dma_desc(dev, txd, &txd_info); + if (txd_info.last) + break; - tx_buf->flags |= !mac->id ? MTK_TX_FLAGS_FPORT0 : MTK_TX_FLAGS_FPORT1; + if (MTK_HAS_CAPS(soc->caps, MTK_QDMA) || (index & 0x1)) { + txd = mtk_qdma_phys_to_virt(ring, txd->txd2); + txd_pdma = qdma_to_pdma(ring, txd); + if (txd == ring->last_free) + goto unmap; - txd_pdma = qdma_to_pdma(ring, txd); - setup_tx_buf(eth, tx_buf, txd_pdma, txd_info.addr, txd_info.size, - index++); + tx_buf = mtk_desc_to_tx_buf(ring, txd, + soc->txrx.txd_size); + memset(tx_buf, 0, sizeof(*tx_buf)); + n_desc++; + } + + memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info)); + txd_info.size = skb_frag_size(&sinfo->frags[index]); + txd_info.last = index + 1 == nr_frags; + data = skb_frag_address(&sinfo->frags[index]); + index++; + } /* store xdpf for cleanup */ - tx_buf->type = dma_map ? MTK_TYPE_XDP_NDO : MTK_TYPE_XDP_TX; - tx_buf->data = xdpf; + htx_buf->data = xdpf; if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) { + txd_pdma = qdma_to_pdma(ring, txd); if (index & 1) txd_pdma->txd2 |= TX_DMA_LS0; else @@ -1608,7 +1653,24 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, mtk_w32(eth, NEXT_DESP_IDX(idx, ring->dma_size), MT7628_TX_CTX_IDX0); } -out: + + spin_unlock(ð->page_lock); + + return 0; + +unmap: + while (htxd != txd) { + txd_pdma = qdma_to_pdma(ring, htxd); + tx_buf = mtk_desc_to_tx_buf(ring, htxd, soc->txrx.txd_size); + mtk_tx_unmap(eth, tx_buf, NULL, false); + + htxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; + if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) + txd_pdma->txd2 = TX_DMA_DESP2_DEF; + + htxd = mtk_qdma_phys_to_virt(ring, htxd->txd2); + } + spin_unlock(ð->page_lock); return err; @@ -1913,6 +1975,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, const struct mtk_reg_map *reg_map = eth->soc->reg_map; struct mtk_tx_ring *ring = ð->tx_ring; struct mtk_tx_buf *tx_buf; + struct xdp_frame_bulk bq; struct mtk_tx_dma *desc; u32 cpu, dma; @@ -1920,6 +1983,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, dma = mtk_r32(eth, reg_map->qdma.drx_ptr); desc = mtk_qdma_phys_to_virt(ring, cpu); + xdp_frame_bulk_init(&bq); while ((cpu != dma) && budget) { u32 next_cpu = desc->txd2; @@ -1937,25 +2001,23 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, if (!tx_buf->data) break; - if (tx_buf->type == MTK_TYPE_SKB && - tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { - struct sk_buff *skb = tx_buf->data; + if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { + if (tx_buf->type == MTK_TYPE_SKB) { + struct sk_buff *skb = tx_buf->data; - bytes[mac] += skb->len; - done[mac]++; - budget--; - } else if (tx_buf->type == MTK_TYPE_XDP_TX || - tx_buf->type == MTK_TYPE_XDP_NDO) { + bytes[mac] += skb->len; + done[mac]++; + } budget--; } - - mtk_tx_unmap(eth, tx_buf, true); + mtk_tx_unmap(eth, tx_buf, &bq, true); ring->last_free = desc; atomic_inc(&ring->free_count); cpu = next_cpu; } + xdp_flush_frame_bulk(&bq); ring->last_free_ptr = cpu; mtk_w32(eth, cpu, reg_map->qdma.crx_ptr); @@ -1968,29 +2030,29 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget, { struct mtk_tx_ring *ring = ð->tx_ring; struct mtk_tx_buf *tx_buf; + struct xdp_frame_bulk bq; struct mtk_tx_dma *desc; u32 cpu, dma; cpu = ring->cpu_idx; dma = mtk_r32(eth, MT7628_TX_DTX_IDX0); + xdp_frame_bulk_init(&bq); while ((cpu != dma) && budget) { tx_buf = &ring->buf[cpu]; if (!tx_buf->data) break; - if (tx_buf->type == MTK_TYPE_SKB && - tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { - struct sk_buff *skb = tx_buf->data; - bytes[0] += skb->len; - done[0]++; - budget--; - } else if (tx_buf->type == MTK_TYPE_XDP_TX || - tx_buf->type == MTK_TYPE_XDP_NDO) { + if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { + if (tx_buf->type == MTK_TYPE_SKB) { + struct sk_buff *skb = tx_buf->data; + + bytes[0] += skb->len; + done[0]++; + } budget--; } - - mtk_tx_unmap(eth, tx_buf, true); + mtk_tx_unmap(eth, tx_buf, &bq, true); desc = ring->dma + cpu * eth->soc->txrx.txd_size; ring->last_free = desc; @@ -1998,6 +2060,7 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget, cpu = NEXT_DESP_IDX(cpu, ring->dma_size); } + xdp_flush_frame_bulk(&bq); ring->cpu_idx = cpu; @@ -2207,7 +2270,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) if (ring->buf) { for (i = 0; i < MTK_DMA_SIZE; i++) - mtk_tx_unmap(eth, &ring->buf[i], false); + mtk_tx_unmap(eth, &ring->buf[i], NULL, false); kfree(ring->buf); ring->buf = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 5b11557f1ae4..0eb7b83637d8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -204,9 +204,13 @@ out: static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) { + struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; int err = 0; mlx4_enter_error_state(persist); + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); mutex_lock(&persist->interface_state_mutex); if (persist->interface_state & MLX4_INTERFACE_STATE_UP && !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { @@ -215,6 +219,7 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) err); } mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); } static void dump_err_buf(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c index ac5468b77488..82a07a31cde7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/crdump.c +++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c @@ -226,10 +226,10 @@ int mlx4_crdump_init(struct mlx4_dev *dev) /* Create cr-space region */ crdump->region_crspace = - devlink_region_create(devlink, - ®ion_cr_space_ops, - MAX_NUM_OF_DUMPS_TO_STORE, - pci_resource_len(pdev, 0)); + devl_region_create(devlink, + ®ion_cr_space_ops, + MAX_NUM_OF_DUMPS_TO_STORE, + pci_resource_len(pdev, 0)); if (IS_ERR(crdump->region_crspace)) mlx4_warn(dev, "crdump: create devlink region %s err %ld\n", region_cr_space_str, @@ -237,10 +237,10 @@ int mlx4_crdump_init(struct mlx4_dev *dev) /* Create fw-health region */ crdump->region_fw_health = - devlink_region_create(devlink, - ®ion_fw_health_ops, - MAX_NUM_OF_DUMPS_TO_STORE, - HEALTH_BUFFER_SIZE); + devl_region_create(devlink, + ®ion_fw_health_ops, + MAX_NUM_OF_DUMPS_TO_STORE, + HEALTH_BUFFER_SIZE); if (IS_ERR(crdump->region_fw_health)) mlx4_warn(dev, "crdump: create devlink region %s err %ld\n", region_fw_health_str, @@ -253,6 +253,6 @@ void mlx4_crdump_end(struct mlx4_dev *dev) { struct mlx4_fw_crdump *crdump = &dev->persist->crdump; - devlink_region_destroy(crdump->region_fw_health); - devlink_region_destroy(crdump->region_crspace); + devl_region_destroy(crdump->region_fw_health); + devl_region_destroy(crdump->region_crspace); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b187c210d4d6..2c764d1d897d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3033,7 +3033,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; int err; - err = devlink_port_register(devlink, &info->devlink_port, port); + err = devl_port_register(devlink, &info->devlink_port, port); if (err) return err; @@ -3071,7 +3071,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); - devlink_port_unregister(&info->devlink_port); + devl_port_unregister(&info->devlink_port); info->port = -1; return err; } @@ -3093,7 +3093,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); - devlink_port_unregister(&info->devlink_port); + devl_port_unregister(&info->devlink_port); info->port = -1; return err; } @@ -3109,7 +3109,7 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); - devlink_port_unregister(&info->devlink_port); + devl_port_unregister(&info->devlink_port); #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(info->rmap); @@ -3333,6 +3333,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, int total_vfs, int *nvfs, struct mlx4_priv *priv, int reset_flow) { + struct devlink *devlink = priv_to_devlink(priv); struct mlx4_dev *dev; unsigned sum = 0; int err; @@ -3341,6 +3342,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, struct mlx4_dev_cap *dev_cap = NULL; int existing_vfs = 0; + devl_assert_locked(devlink); dev = &priv->dev; INIT_LIST_HEAD(&priv->ctx_list); @@ -3956,9 +3958,11 @@ static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported"); return -EOPNOTSUPP; } + devl_lock(devlink); if (persist->num_vfs) mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); mlx4_restart_one_down(persist->pdev); + devl_unlock(devlink); return 0; } @@ -3971,8 +3975,10 @@ static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a struct mlx4_dev_persistent *persist = dev->persist; int err; + devl_lock(devlink); *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); err = mlx4_restart_one_up(persist->pdev, true, devlink); + devl_unlock(devlink); if (err) mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n", err); @@ -3999,6 +4005,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev); if (!devlink) return -ENOMEM; + devl_lock(devlink); priv = devlink_priv(devlink); dev = &priv->dev; @@ -4026,6 +4033,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_save_state(pdev); devlink_set_features(devlink, DEVLINK_F_RELOAD); + devl_unlock(devlink); devlink_register(devlink); return 0; @@ -4035,6 +4043,7 @@ err_params_unregister: err_devlink_unregister: kfree(dev->persist); err_devlink_free: + devl_unlock(devlink); devlink_free(devlink); return ret; } @@ -4056,8 +4065,11 @@ static void mlx4_unload_one(struct pci_dev *pdev) struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; + struct devlink *devlink; int p, i; + devlink = priv_to_devlink(priv); + devl_assert_locked(devlink); if (priv->removed) return; @@ -4137,6 +4149,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) devlink_unregister(devlink); + devl_lock(devlink); if (mlx4_is_slave(dev)) persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; @@ -4172,6 +4185,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) devlink_params_unregister(devlink, mlx4_devlink_params, ARRAY_SIZE(mlx4_devlink_params)); kfree(dev->persist); + devl_unlock(devlink); devlink_free(devlink); } @@ -4292,15 +4306,20 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n"); mlx4_enter_error_state(persist); + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); mutex_lock(&persist->interface_state_mutex); if (persist->interface_state & MLX4_INTERFACE_STATE_UP) mlx4_unload_one(pdev); mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; @@ -4333,6 +4352,7 @@ static void mlx4_pci_resume(struct pci_dev *pdev) struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + struct devlink *devlink; int total_vfs; int err; @@ -4340,6 +4360,8 @@ static void mlx4_pci_resume(struct pci_dev *pdev) total_vfs = dev->persist->num_vfs; memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + devlink = priv_to_devlink(priv); + devl_lock(devlink); mutex_lock(&persist->interface_state_mutex); if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, @@ -4358,19 +4380,23 @@ static void mlx4_pci_resume(struct pci_dev *pdev) } end: mutex_unlock(&persist->interface_state_mutex); - + devl_unlock(devlink); } static void mlx4_shutdown(struct pci_dev *pdev) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; mlx4_info(persist->dev, "mlx4_shutdown was called\n"); + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); mutex_lock(&persist->interface_state_mutex); if (persist->interface_state & MLX4_INTERFACE_STATE_UP) mlx4_unload_one(pdev); mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); mlx4_pci_disable_device(dev); } @@ -4385,12 +4411,16 @@ static int __maybe_unused mlx4_suspend(struct device *dev_d) struct pci_dev *pdev = to_pci_dev(dev_d); struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; + struct devlink *devlink; mlx4_err(dev, "suspend was called\n"); + devlink = priv_to_devlink(mlx4_priv(dev)); + devl_lock(devlink); mutex_lock(&persist->interface_state_mutex); if (persist->interface_state & MLX4_INTERFACE_STATE_UP) mlx4_unload_one(pdev); mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); return 0; } @@ -4402,6 +4432,7 @@ static int __maybe_unused mlx4_resume(struct device *dev_d) struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + struct devlink *devlink; int total_vfs; int ret = 0; @@ -4409,6 +4440,8 @@ static int __maybe_unused mlx4_resume(struct device *dev_d) total_vfs = dev->persist->num_vfs; memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + devlink = priv_to_devlink(priv); + devl_lock(devlink); mutex_lock(&persist->interface_state_mutex); if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, @@ -4422,6 +4455,7 @@ static int __maybe_unused mlx4_resume(struct device *dev_d) } } mutex_unlock(&persist->interface_state_mutex); + devl_unlock(devlink); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ccf2068d2e79..0571e40c6ee5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -335,13 +335,12 @@ static void del_adev(struct auxiliary_device *adev) int mlx5_attach_device(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; struct auxiliary_driver *adrv; int ret = 0, i; - devl_lock(devlink); + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); priv->flags &= ~MLX5_PRIV_FLAGS_DETACH; priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; @@ -394,20 +393,18 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) } priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; mutex_unlock(&mlx5_intf_mutex); - devl_unlock(devlink); return ret; } void mlx5_detach_device(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; struct auxiliary_driver *adrv; pm_message_t pm = {}; int i; - devl_lock(devlink); + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { @@ -441,21 +438,17 @@ skip_suspend: priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; priv->flags |= MLX5_PRIV_FLAGS_DETACH; mutex_unlock(&mlx5_intf_mutex); - devl_unlock(devlink); } int mlx5_register_device(struct mlx5_core_dev *dev) { - struct devlink *devlink; int ret; - devlink = priv_to_devlink(dev); - devl_lock(devlink); + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; ret = mlx5_rescan_drivers_locked(dev); mutex_unlock(&mlx5_intf_mutex); - devl_unlock(devlink); if (ret) mlx5_unregister_device(dev); @@ -464,15 +457,11 @@ int mlx5_register_device(struct mlx5_core_dev *dev) void mlx5_unregister_device(struct mlx5_core_dev *dev) { - struct devlink *devlink; - - devlink = priv_to_devlink(dev); - devl_lock(devlink); + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; mlx5_rescan_drivers_locked(dev); mutex_unlock(&mlx5_intf_mutex); - devl_unlock(devlink); } static int add_drivers(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index f85166e587f2..1c05a7091698 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -104,7 +104,16 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - return mlx5_fw_reset_wait_reset_done(dev); + err = mlx5_fw_reset_wait_reset_done(dev); + if (err) + return err; + + mlx5_unload_one_devl_locked(dev); + err = mlx5_health_wait_pci_up(dev); + if (err) + NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); + + return err; } static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink, @@ -134,6 +143,7 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, struct mlx5_core_dev *dev = devlink_priv(devlink); struct pci_dev *pdev = dev->pdev; bool sf_dev_allocated; + int ret = 0; sf_dev_allocated = mlx5_sf_dev_allocated(dev); if (sf_dev_allocated) { @@ -154,19 +164,25 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); } + devl_lock(devlink); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - mlx5_unload_one(dev); - return 0; + mlx5_unload_one_devl_locked(dev); + break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) - return mlx5_devlink_trigger_fw_live_patch(devlink, extack); - return mlx5_devlink_reload_fw_activate(devlink, extack); + ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack); + else + ret = mlx5_devlink_reload_fw_activate(devlink, extack); + break; default: /* Unsupported action should not get to this function */ WARN_ON(1); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; } + + devl_unlock(devlink); + return ret; } static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, @@ -174,24 +190,29 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + int ret = 0; + devl_lock(devlink); *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - return mlx5_load_one(dev, false); + ret = mlx5_load_one_devl_locked(dev, false); + break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) break; /* On fw_activate action, also driver is reloaded and reinit performed */ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return mlx5_load_one(dev, false); + ret = mlx5_load_one_devl_locked(dev, false); + break; default: /* Unsupported action should not get to this function */ WARN_ON(1); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; } - return 0; + devl_unlock(devlink); + return ret; } static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id) @@ -828,28 +849,28 @@ static int mlx5_devlink_traps_register(struct devlink *devlink) struct mlx5_core_dev *core_dev = devlink_priv(devlink); int err; - err = devlink_trap_groups_register(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); + err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); if (err) return err; - err = devlink_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), - &core_dev->priv); + err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), + &core_dev->priv); if (err) goto err_trap_group; return 0; err_trap_group: - devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); return err; } static void mlx5_devlink_traps_unregister(struct devlink *devlink) { - devlink_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); - devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); + devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); } int mlx5_devlink_register(struct devlink *devlink) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 04c0a5e1c89a..1839f1ab1ddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -194,4 +194,14 @@ static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv) { mlx5e_ktls_cleanup_rx(priv); } + +static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv) +{ + return mlx5e_ktls_init_tx(priv); +} + +static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv) +{ + mlx5e_ktls_cleanup_tx(priv); +} #endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index d016624fbc9d..948400dee525 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -42,6 +42,8 @@ static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, } void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv); int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv); int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable); @@ -62,6 +64,8 @@ static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) struct mlx5e_tls_sw_stats { atomic64_t tx_tls_ctx; atomic64_t tx_tls_del; + atomic64_t tx_tls_pool_alloc; + atomic64_t tx_tls_pool_free; atomic64_t rx_tls_ctx; atomic64_t rx_tls_del; }; @@ -69,6 +73,7 @@ struct mlx5e_tls_sw_stats { struct mlx5e_tls { struct mlx5e_tls_sw_stats sw_stats; struct workqueue_struct *rx_wq; + struct mlx5e_tls_tx_pool *tx_pool; }; int mlx5e_ktls_init(struct mlx5e_priv *priv); @@ -83,6 +88,15 @@ static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { } +static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ +} + static inline int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) { return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c index 2ab46c4247ff..7c1c0eb16787 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -41,6 +41,8 @@ static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) }, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index fba21edf88d8..6b6c7044b64a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -35,30 +35,70 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS); + stop_room += 1; /* fence nop */ return stop_room; } +static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc) +{ + MLX5_SET(tisc, tisc, tls_en, 1); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); +} + static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; - void *tisc; - tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); - MLX5_SET(tisc, tisc, tls_en, 1); + return mlx5_core_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); +} + +static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); - return mlx5e_create_tis(mdev, in, tisn); + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); } struct mlx5e_ktls_offload_context_tx { - struct tls_offload_context_tx *tx_ctx; - struct tls12_crypto_info_aes_gcm_128 crypto_info; - struct mlx5e_tls_sw_stats *sw_stats; + /* fast path */ u32 expected_seq; u32 tisn; - u32 key_id; bool ctx_post_pending; + /* control / resync */ + struct list_head list_node; /* member of the pool */ + struct tls12_crypto_info_aes_gcm_128 crypto_info; + struct tls_offload_context_tx *tx_ctx; + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + u32 key_id; + u8 create_err : 1; }; static void @@ -82,28 +122,368 @@ mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) return *ctx; } +/* struct for callback API management */ +struct mlx5e_async_ctx { + struct mlx5_async_work context; + struct mlx5_async_ctx async_ctx; + struct work_struct work; + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct completion complete; + int err; + union { + u32 out_create[MLX5_ST_SZ_DW(create_tis_out)]; + u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)]; + }; +}; + +static struct mlx5e_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n) +{ + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = kvcalloc(n, sizeof(struct mlx5e_async_ctx), GFP_KERNEL); + if (!bulk_async) + return NULL; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_init_async_ctx(mdev, &async->async_ctx); + init_completion(&async->complete); + } + + return bulk_async; +} + +static void mlx5e_bulk_async_cleanup(struct mlx5e_async_ctx *bulk_async, int n) +{ + int i; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_cleanup_async_ctx(&async->async_ctx); + } + kvfree(bulk_async); +} + +static void create_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + if (status) { + async->err = status; + priv_tx->create_err = 1; + goto out; + } + + priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn); +out: + complete(&async->complete); +} + +static void destroy_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + complete(&async->complete); + kfree(priv_tx); +} + +static struct mlx5e_ktls_offload_context_tx * +mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats, + struct mlx5e_async_ctx *async) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + int err; + + priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); + if (!priv_tx) + return ERR_PTR(-ENOMEM); + + priv_tx->mdev = mdev; + priv_tx->sw_stats = sw_stats; + + if (!async) { + err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); + if (err) + goto err_out; + } else { + async->priv_tx = priv_tx; + err = mlx5e_ktls_create_tis_cb(mdev, &async->async_ctx, + async->out_create, sizeof(async->out_create), + create_tis_callback, &async->context); + if (err) + goto err_out; + } + + return priv_tx; + +err_out: + kfree(priv_tx); + return ERR_PTR(err); +} + +static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_async_ctx *async) +{ + if (priv_tx->create_err) { + complete(&async->complete); + kfree(priv_tx); + return; + } + async->priv_tx = priv_tx; + mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn, + &async->async_ctx, + async->out_destroy, sizeof(async->out_destroy), + destroy_tis_callback, &async->context); +} + +static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev, + struct list_head *list, int size) +{ + struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = mlx5e_bulk_async_init(mdev, size); + if (!bulk_async) + return; + + i = 0; + list_for_each_entry(obj, list, list_node) { + mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]); + i++; + } + + for (i = 0; i < size; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + wait_for_completion(&async->complete); + } + mlx5e_bulk_async_cleanup(bulk_async, size); +} + +/* Recycling pool API */ + +#define MLX5E_TLS_TX_POOL_BULK (16) +#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024) +#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4) + +struct mlx5e_tls_tx_pool { + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + struct mutex lock; /* Protects access to the pool */ + struct list_head list; + size_t size; + + struct workqueue_struct *wq; + struct work_struct create_work; + struct work_struct destroy_work; +}; + +static void create_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, create_work); + struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_async_ctx *bulk_async; + LIST_HEAD(local_list); + int i, j, err = 0; + + bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK); + if (!bulk_async) + return; + + for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) { + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + list_add(&obj->list_node, &local_list); + } + + for (j = 0; j < i; j++) { + struct mlx5e_async_ctx *async = &bulk_async[j]; + + wait_for_completion(&async->complete); + if (!err && async->err) + err = async->err; + } + atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc); + mlx5e_bulk_async_cleanup(bulk_async, MLX5E_TLS_TX_POOL_BULK); + if (err) + goto err_out; + + mutex_lock(&pool->lock); + if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + goto err_out; + } + list_splice(&local_list, &pool->list); + pool->size += MLX5E_TLS_TX_POOL_BULK; + if (pool->size <= MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + return; + +err_out: + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i); + atomic64_add(i, &pool->sw_stats->tx_tls_pool_free); +} + +static void destroy_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, destroy_work); + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + mutex_lock(&pool->lock); + if (pool->size < MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + return; + } + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + if (pool->size >= MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); +} + +static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev, + struct mlx5e_tls_sw_stats *sw_stats) +{ + struct mlx5e_tls_tx_pool *pool; + + BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH); + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool"); + if (!pool->wq) + goto err_free; + + INIT_LIST_HEAD(&pool->list); + mutex_init(&pool->lock); + + INIT_WORK(&pool->create_work, create_work); + INIT_WORK(&pool->destroy_work, destroy_work); + + pool->mdev = mdev; + pool->sw_stats = sw_stats; + + return pool; + +err_free: + kvfree(pool); + return NULL; +} + +static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + while (pool->size > MLX5E_TLS_TX_POOL_BULK) { + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + } + if (pool->size) { + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size); + atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free); + } +} + +static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + mlx5e_tls_tx_pool_list_cleanup(pool); + destroy_workqueue(pool->wq); + kvfree(pool); +} + +static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj) +{ + mutex_lock(&pool->lock); + list_add(&obj->list_node, &pool->list); + if (++pool->size == MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, &pool->destroy_work); + mutex_unlock(&pool->lock); +} + +static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool) +{ + struct mlx5e_ktls_offload_context_tx *obj; + + mutex_lock(&pool->lock); + if (unlikely(pool->size == 0)) { + /* pool is empty: + * - trigger the populating work, and + * - serve the current context via the regular blocking api. + */ + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL); + if (!IS_ERR(obj)) + atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc); + return obj; + } + + obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx, + list_node); + list_del(&obj->list_node); + if (--pool->size == MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + return obj; +} + +/* End of pool API */ + int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn) { struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_tls_tx_pool *pool; struct tls_context *tls_ctx; - struct mlx5_core_dev *mdev; struct mlx5e_priv *priv; int err; tls_ctx = tls_get_ctx(sk); priv = netdev_priv(netdev); - mdev = priv->mdev; + pool = priv->tls->tx_pool; - priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); - if (!priv_tx) - return -ENOMEM; + priv_tx = pool_pop(pool); + if (IS_ERR(priv_tx)) + return PTR_ERR(priv_tx); - err = mlx5_ktls_create_key(mdev, crypto_info, &priv_tx->key_id); + err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id); if (err) goto err_create_key; - priv_tx->sw_stats = &priv->tls->sw_stats; priv_tx->expected_seq = start_offload_tcp_sn; priv_tx->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; @@ -111,36 +491,29 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx); - err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); - if (err) - goto err_create_tis; - priv_tx->ctx_post_pending = true; atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx); return 0; -err_create_tis: - mlx5_ktls_destroy_key(mdev, priv_tx->key_id); err_create_key: - kfree(priv_tx); + pool_push(pool, priv_tx); return err; } void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx) { struct mlx5e_ktls_offload_context_tx *priv_tx; - struct mlx5_core_dev *mdev; + struct mlx5e_tls_tx_pool *pool; struct mlx5e_priv *priv; priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); priv = netdev_priv(netdev); - mdev = priv->mdev; + pool = priv->tls->tx_pool; atomic64_inc(&priv_tx->sw_stats->tx_tls_del); - mlx5e_destroy_tis(mdev, priv_tx->tisn); - mlx5_ktls_destroy_key(mdev, priv_tx->key_id); - kfree(priv_tx); + mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id); + pool_push(pool, priv_tx); } static void tx_fill_wi(struct mlx5e_txqsq *sq, @@ -201,6 +574,16 @@ post_progress_params(struct mlx5e_txqsq *sq, sq->pc += num_wqebbs; } +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, 0, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + static void mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, struct mlx5e_ktls_offload_context_tx *priv_tx, @@ -212,6 +595,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, post_static_params(sq, priv_tx, fence_first_post); post_progress_params(sq, priv_tx, progress_fence); + tx_post_fence_nop(sq); } struct tx_sync_info { @@ -304,7 +688,7 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, } static int -tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first) +tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn) { struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_data_seg *dseg; @@ -326,7 +710,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); cseg->tis_tir_num = cpu_to_be32(tisn << 8); - cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, @@ -361,67 +744,39 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, stats->tls_dump_bytes += wi->num_bytes; } -static void tx_post_fence_nop(struct mlx5e_txqsq *sq) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - - tx_fill_wi(sq, pi, 1, 0, NULL); - - mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); -} - static enum mlx5e_ktls_sync_retval mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, struct mlx5e_txqsq *sq, int datalen, u32 seq) { - struct mlx5e_sq_stats *stats = sq->stats; enum mlx5e_ktls_sync_retval ret; struct tx_sync_info info = {}; - int i = 0; + int i; ret = tx_sync_info_get(priv_tx, seq, datalen, &info); - if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) { - if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) { - stats->tls_skip_no_sync_data++; - return MLX5E_KTLS_SYNC_SKIP_NO_DATA; - } - /* We might get here if a retransmission reaches the driver - * after the relevant record is acked. + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) + /* We might get here with ret == FAIL if a retransmission + * reaches the driver after the relevant record is acked. * It should be safe to drop the packet in this case */ - stats->tls_drop_no_sync_data++; - goto err_out; - } - - stats->tls_ooo++; + return ret; tx_post_resync_params(sq, priv_tx, info.rcd_sn); - /* If no dump WQE was sent, we need to have a fence NOP WQE before the - * actual data xmit. - */ - if (!info.nr_frags) { - tx_post_fence_nop(sq); - return MLX5E_KTLS_SYNC_DONE; - } - - for (; i < info.nr_frags; i++) { + for (i = 0; i < info.nr_frags; i++) { unsigned int orig_fsz, frag_offset = 0, n = 0; skb_frag_t *f = &info.frags[i]; orig_fsz = skb_frag_size(f); do { - bool fence = !(i || frag_offset); unsigned int fsz; n++; fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset); skb_frag_size_set(f, fsz); - if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) { + if (tx_post_resync_dump(sq, f, priv_tx->tisn)) { page_ref_add(skb_frag_page(f), n - 1); goto err_out; } @@ -469,24 +824,27 @@ bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); - if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); - } seq = ntohl(tcp_hdr(skb)->seq); if (unlikely(priv_tx->expected_seq != seq)) { enum mlx5e_ktls_sync_retval ret = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + stats->tls_ooo++; + switch (ret) { case MLX5E_KTLS_SYNC_DONE: break; case MLX5E_KTLS_SYNC_SKIP_NO_DATA: + stats->tls_skip_no_sync_data++; if (likely(!skb->decrypted)) goto out; WARN_ON_ONCE(1); - fallthrough; + goto err_out; case MLX5E_KTLS_SYNC_FAIL: + stats->tls_drop_no_sync_data++; goto err_out; } } @@ -505,3 +863,24 @@ err_out: dev_kfree_skb_any(skb); return false; } + +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return 0; + + priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); + if (!priv->tls->tx_pool) + return -ENOMEM; + + return 0; +} + +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return; + + mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); + priv->tls->tx_pool = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 180b2f418339..24ddd438c066 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3144,6 +3144,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_mqprio_rl_free(priv->mqprio_rl); priv->mqprio_rl = NULL; } + mlx5e_accel_cleanup_tx(priv); mlx5e_destroy_tises(priv); } @@ -5147,9 +5148,17 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_accel_init_tx(priv); + if (err) + goto err_destroy_tises; + mlx5e_set_mqprio_rl(priv); mlx5e_dcbnl_initialize(priv); return 0; + +err_destroy_tises: + mlx5e_destroy_tises(priv); + return err; } static void mlx5e_nic_enable(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 30a6c9fbf1b6..6aa58044b949 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1300,20 +1300,19 @@ abort: */ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { - struct devlink *devlink; bool toggle_lag; int ret; if (!mlx5_esw_allowed(esw)) return 0; + devl_assert_locked(priv_to_devlink(esw->dev)); + toggle_lag = !mlx5_esw_is_fdb_created(esw); if (toggle_lag) mlx5_lag_disable_change(esw->dev); - devlink = priv_to_devlink(esw->dev); - devl_lock(devlink); down_write(&esw->mode_lock); if (!mlx5_esw_is_fdb_created(esw)) { ret = mlx5_eswitch_enable_locked(esw, num_vfs); @@ -1327,7 +1326,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = num_vfs; } up_write(&esw->mode_lock); - devl_unlock(devlink); if (toggle_lag) mlx5_lag_enable_change(esw->dev); @@ -1338,13 +1336,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) /* When disabling sriov, free driver level resources. */ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) { - struct devlink *devlink; - if (!mlx5_esw_allowed(esw)) return; - devlink = priv_to_devlink(esw->dev); - devl_lock(devlink); + devl_assert_locked(priv_to_devlink(esw->dev)); down_write(&esw->mode_lock); /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. @@ -1373,7 +1368,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) unlock: up_write(&esw->mode_lock); - devl_unlock(devlink); } /* Free resources for corresponding eswitch mode. It is called by devlink @@ -1407,18 +1401,14 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { - struct devlink *devlink; - if (!mlx5_esw_allowed(esw)) return; + devl_assert_locked(priv_to_devlink(esw->dev)); mlx5_lag_disable_change(esw->dev); - devlink = priv_to_devlink(esw->dev); - devl_lock(devlink); down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw); up_write(&esw->mode_lock); - devl_unlock(devlink); mlx5_lag_enable_change(esw->dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 052af4901c0b..e8896f368362 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -149,6 +149,9 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { + mlx5_unload_one(dev); + if (mlx5_health_wait_pci_up(dev)) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); mlx5_load_one(dev, false); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | @@ -183,15 +186,9 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, reset_reload_work); struct mlx5_core_dev *dev = fw_reset->dev; - int err; mlx5_sync_reset_clear_reset_requested(dev, false); mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev); - err = mlx5_health_wait_pci_up(dev); - if (err) - mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); - fw_reset->ret = err; mlx5_fw_reset_complete_reload(dev); } @@ -395,7 +392,6 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) } mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev); done: fw_reset->ret = err; mlx5_fw_reset_complete_reload(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 659021c31cbd..2cf2c9948446 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -666,16 +666,20 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) struct mlx5_fw_reporter_ctx fw_reporter_ctx; struct mlx5_core_health *health; struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; health = container_of(work, struct mlx5_core_health, fatal_report_work); priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); + devlink = priv_to_devlink(dev); enter_error_state(dev, false); if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + devl_lock(devlink); if (mlx5_health_try_recover(dev)) mlx5_core_err(dev, "health recovery failed\n"); + devl_unlock(devlink); return; } fw_reporter_ctx.err_synd = health->synd; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8b621c1ddd14..1de9b39a6359 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1304,8 +1304,10 @@ static void mlx5_unload(struct mlx5_core_dev *dev) int mlx5_init_one(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); int err = 0; + devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; @@ -1334,6 +1336,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto err_register; mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); return 0; err_register: @@ -1348,11 +1351,15 @@ function_teardown: err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); return err; } void mlx5_uninit_one(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); mlx5_unregister_device(dev); @@ -1371,13 +1378,15 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) mlx5_function_teardown(dev, true); out: mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); } -int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery) { int err = 0; u64 timeout; + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "interface is up, NOP\n"); @@ -1419,8 +1428,20 @@ out: return err; } -void mlx5_unload_one(struct mlx5_core_dev *dev) +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) +{ + struct devlink *devlink = priv_to_devlink(dev); + int ret; + + devl_lock(devlink); + ret = mlx5_load_one_devl_locked(dev, recovery); + devl_unlock(devlink); + return ret; +} + +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev) { + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&dev->intf_state_mutex); mlx5_detach_device(dev); @@ -1438,6 +1459,15 @@ out: mutex_unlock(&dev->intf_state_mutex); } +void mlx5_unload_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_unload_one_devl_locked(dev); + devl_unlock(devlink); +} + static const int types[] = { MLX5_CAP_GENERAL, MLX5_CAP_GENERAL_2, @@ -1902,7 +1932,7 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { mlx5_error_sw_reset(dev); - mlx5_unload_one(dev); + mlx5_unload_one_devl_locked(dev); } int mlx5_recover_device(struct mlx5_core_dev *dev) @@ -1913,7 +1943,7 @@ int mlx5_recover_device(struct mlx5_core_dev *dev) return -EIO; } - return mlx5_load_one(dev, true); + return mlx5_load_one_devl_locked(dev, true); } static struct pci_driver mlx5_core_driver = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 9cc7afea2758..ad61b86d5769 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -290,7 +290,9 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev); +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 5757cd6e1819..ee2e1b7c1310 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -154,13 +154,16 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); int err; + devl_lock(devlink); err = mlx5_device_enable_sriov(dev, num_vfs); if (err) { mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); return err; } + devl_unlock(devlink); err = pci_enable_sriov(pdev, num_vfs); if (err) { @@ -173,10 +176,13 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) void mlx5_sriov_disable(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); int num_vfs = pci_num_vf(dev->pdev); pci_disable_sriov(pdev); + devl_lock(devlink); mlx5_device_disable_sriov(dev, num_vfs, true); + devl_unlock(devlink); } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 4683312861ac..a510bf2cff2f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -7,6 +7,7 @@ config MLXSW_CORE tristate "Mellanox Technologies Switch ASICs support" select NET_DEVLINK select MLXFW + select AUXILIARY_BUS help This driver supports Mellanox Technologies Switch ASICs family. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index c2d6d64ffe4b..3ca9fce759ea 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o core_acl_flex_keys.o \ core_acl_flex_actions.o core_env.o \ - core_linecards.o + core_linecards.o core_linecard_dev.o mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 1b61bc8f59a2..7c93bd04a3a1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -951,6 +951,20 @@ static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind) return mlxsw_driver; } +int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, + struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + int err; + + mlxsw_core->fw_flash_in_progress = true; + err = mlxfw_firmware_flash(mlxfw_dev, firmware, extack); + mlxsw_core->fw_flash_in_progress = false; + + return err; +} + struct mlxsw_core_fw_info { struct mlxfw_dev mlxfw_dev; struct mlxsw_core *mlxsw_core; @@ -1105,8 +1119,9 @@ static const struct mlxfw_dev_ops mlxsw_core_fw_mlxsw_dev_ops = { .fsm_release = mlxsw_core_fw_fsm_release, }; -static int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, const struct firmware *firmware, - struct netlink_ext_ack *extack) +static int mlxsw_core_dev_fw_flash(struct mlxsw_core *mlxsw_core, + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlxsw_core_fw_info mlxsw_core_fw_info = { .mlxfw_dev = { @@ -1117,13 +1132,9 @@ static int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, const struct firmw }, .mlxsw_core = mlxsw_core }; - int err; - mlxsw_core->fw_flash_in_progress = true; - err = mlxfw_firmware_flash(&mlxsw_core_fw_info.mlxfw_dev, firmware, extack); - mlxsw_core->fw_flash_in_progress = false; - - return err; + return mlxsw_core_fw_flash(mlxsw_core, &mlxsw_core_fw_info.mlxfw_dev, + firmware, extack); } static int mlxsw_core_fw_rev_validate(struct mlxsw_core *mlxsw_core, @@ -1169,7 +1180,7 @@ static int mlxsw_core_fw_rev_validate(struct mlxsw_core *mlxsw_core, return err; } - err = mlxsw_core_fw_flash(mlxsw_core, firmware, NULL); + err = mlxsw_core_dev_fw_flash(mlxsw_core, firmware, NULL); release_firmware(firmware); if (err) dev_err(mlxsw_bus_info->dev, "Could not upgrade firmware\n"); @@ -1187,7 +1198,7 @@ static int mlxsw_core_fw_flash_update(struct mlxsw_core *mlxsw_core, struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { - return mlxsw_core_fw_flash(mlxsw_core, params->fw, extack); + return mlxsw_core_dev_fw_flash(mlxsw_core, params->fw, extack); } static int mlxsw_core_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id, @@ -3324,6 +3335,24 @@ u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_read_frc_l); +u32 mlxsw_core_read_utc_sec(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_utc_sec(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_utc_sec); + +u32 mlxsw_core_read_utc_nsec(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_utc_nsec(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_utc_nsec); + +bool mlxsw_core_sdq_supports_cqe_v2(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver->sdq_supports_cqe_v2; +} +EXPORT_SYMBOL(mlxsw_core_sdq_supports_cqe_v2); + void mlxsw_core_emad_string_tlv_enable(struct mlxsw_core *mlxsw_core) { mlxsw_core->emad.enable_string_tlv = true; @@ -3334,9 +3363,15 @@ static int __init mlxsw_core_module_init(void) { int err; + err = mlxsw_linecard_driver_register(); + if (err) + return err; + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); - if (!mlxsw_wq) - return -ENOMEM; + if (!mlxsw_wq) { + err = -ENOMEM; + goto err_alloc_workqueue; + } mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, mlxsw_core_driver_name); if (!mlxsw_owq) { @@ -3347,6 +3382,8 @@ static int __init mlxsw_core_module_init(void) err_alloc_ordered_workqueue: destroy_workqueue(mlxsw_wq); +err_alloc_workqueue: + mlxsw_linecard_driver_unregister(); return err; } @@ -3354,6 +3391,7 @@ static void __exit mlxsw_core_module_exit(void) { destroy_workqueue(mlxsw_owq); destroy_workqueue(mlxsw_wq); + mlxsw_linecard_driver_unregister(); } module_init(mlxsw_core_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 9d2e8a8d3a75..02d9cc2ef0c8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -12,12 +12,14 @@ #include <linux/skbuff.h> #include <linux/workqueue.h> #include <linux/net_namespace.h> +#include <linux/auxiliary_bus.h> #include <net/devlink.h> #include "trap.h" #include "reg.h" #include "cmd.h" #include "resources.h" +#include "../mlxfw/mlxfw.h" enum mlxsw_core_resource_id { MLXSW_CORE_RESOURCE_PORTS = 1, @@ -47,6 +49,11 @@ mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); +int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, + struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack); + int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, @@ -420,6 +427,7 @@ struct mlxsw_driver { u8 txhdr_len; const struct mlxsw_config_profile *profile; + bool sdq_supports_cqe_v2; }; int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, @@ -430,6 +438,11 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core); u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_utc_sec(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_utc_nsec(struct mlxsw_core *mlxsw_core); + +bool mlxsw_core_sdq_supports_cqe_v2(struct mlxsw_core *mlxsw_core); + void mlxsw_core_emad_string_tlv_enable(struct mlxsw_core *mlxsw_core); bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, @@ -469,6 +482,8 @@ struct mlxsw_bus { u8 *p_status); u32 (*read_frc_h)(void *bus_priv); u32 (*read_frc_l)(void *bus_priv); + u32 (*read_utc_sec)(void *bus_priv); + u32 (*read_utc_nsec)(void *bus_priv); u8 features; }; @@ -543,11 +558,17 @@ enum mlxsw_devlink_param_id { MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, }; +struct mlxsw_cqe_ts { + u8 sec; + u32 nsec; +}; + struct mlxsw_skb_cb { union { struct mlxsw_tx_info tx_info; struct mlxsw_rx_md_info rx_md_info; }; + struct mlxsw_cqe_ts cqe_ts; }; static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb) @@ -563,6 +584,15 @@ enum mlxsw_linecard_status_event_type { MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION, }; +struct mlxsw_linecard_bdev; + +struct mlxsw_linecard_device_info { + u16 fw_major; + u16 fw_minor; + u16 fw_sub_minor; + char psid[MLXSW_REG_MGIR_FW_INFO_PSID_SIZE]; +}; + struct mlxsw_linecard { u8 slot_index; struct mlxsw_linecards *linecards; @@ -577,6 +607,11 @@ struct mlxsw_linecard { active:1; u16 hw_revision; u16 ini_version; + struct mlxsw_linecard_bdev *bdev; + struct { + struct mlxsw_linecard_device_info info; + u8 index; + } device; }; struct mlxsw_linecard_types_info; @@ -597,6 +632,14 @@ mlxsw_linecard_get(struct mlxsw_linecards *linecards, u8 slot_index) return &linecards->linecards[slot_index - 1]; } +int mlxsw_linecard_devlink_info_get(struct mlxsw_linecard *linecard, + struct devlink_info_req *req, + struct netlink_ext_ack *extack); +int mlxsw_linecard_flash_update(struct devlink *linecard_devlink, + struct mlxsw_linecard *linecard, + const struct firmware *firmware, + struct netlink_ext_ack *extack); + int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *bus_info); void mlxsw_linecards_fini(struct mlxsw_core *mlxsw_core); @@ -616,4 +659,10 @@ void mlxsw_linecards_event_ops_unregister(struct mlxsw_core *mlxsw_core, struct mlxsw_linecards_event_ops *ops, void *priv); +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard); +void mlxsw_linecard_bdev_del(struct mlxsw_linecard *linecard); + +int mlxsw_linecard_driver_register(void); +void mlxsw_linecard_driver_unregister(void); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c new file mode 100644 index 000000000000..af37e650a8ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2022 NVIDIA Corporation and Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/types.h> +#include <linux/auxiliary_bus.h> +#include <linux/idr.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <net/devlink.h> +#include "core.h" + +#define MLXSW_LINECARD_DEV_ID_NAME "lc" + +struct mlxsw_linecard_dev { + struct mlxsw_linecard *linecard; +}; + +struct mlxsw_linecard_bdev { + struct auxiliary_device adev; + struct mlxsw_linecard *linecard; + struct mlxsw_linecard_dev *linecard_dev; +}; + +static DEFINE_IDA(mlxsw_linecard_bdev_ida); + +static int mlxsw_linecard_bdev_id_alloc(void) +{ + return ida_alloc(&mlxsw_linecard_bdev_ida, GFP_KERNEL); +} + +static void mlxsw_linecard_bdev_id_free(int id) +{ + ida_free(&mlxsw_linecard_bdev_ida, id); +} + +static void mlxsw_linecard_bdev_release(struct device *device) +{ + struct auxiliary_device *adev = + container_of(device, struct auxiliary_device, dev); + struct mlxsw_linecard_bdev *linecard_bdev = + container_of(adev, struct mlxsw_linecard_bdev, adev); + + mlxsw_linecard_bdev_id_free(adev->id); + kfree(linecard_bdev); +} + +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard) +{ + struct mlxsw_linecard_bdev *linecard_bdev; + int err; + int id; + + id = mlxsw_linecard_bdev_id_alloc(); + if (id < 0) + return id; + + linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL); + if (!linecard_bdev) { + mlxsw_linecard_bdev_id_free(id); + return -ENOMEM; + } + linecard_bdev->adev.id = id; + linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME; + linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release; + linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev; + linecard_bdev->linecard = linecard; + + err = auxiliary_device_init(&linecard_bdev->adev); + if (err) { + mlxsw_linecard_bdev_id_free(id); + kfree(linecard_bdev); + return err; + } + + err = auxiliary_device_add(&linecard_bdev->adev); + if (err) { + auxiliary_device_uninit(&linecard_bdev->adev); + return err; + } + + linecard->bdev = linecard_bdev; + return 0; +} + +void mlxsw_linecard_bdev_del(struct mlxsw_linecard *linecard) +{ + struct mlxsw_linecard_bdev *linecard_bdev = linecard->bdev; + + if (!linecard_bdev) + /* Unprovisioned line cards do not have an auxiliary device. */ + return; + auxiliary_device_delete(&linecard_bdev->adev); + auxiliary_device_uninit(&linecard_bdev->adev); + linecard->bdev = NULL; +} + +static int mlxsw_linecard_dev_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlxsw_linecard_dev *linecard_dev = devlink_priv(devlink); + struct mlxsw_linecard *linecard = linecard_dev->linecard; + + return mlxsw_linecard_devlink_info_get(linecard, req, extack); +} + +static int +mlxsw_linecard_dev_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_linecard_dev *linecard_dev = devlink_priv(devlink); + struct mlxsw_linecard *linecard = linecard_dev->linecard; + + return mlxsw_linecard_flash_update(devlink, linecard, + params->fw, extack); +} + +static const struct devlink_ops mlxsw_linecard_dev_devlink_ops = { + .info_get = mlxsw_linecard_dev_devlink_info_get, + .flash_update = mlxsw_linecard_dev_devlink_flash_update, +}; + +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlxsw_linecard_bdev *linecard_bdev = + container_of(adev, struct mlxsw_linecard_bdev, adev); + struct mlxsw_linecard *linecard = linecard_bdev->linecard; + struct mlxsw_linecard_dev *linecard_dev; + struct devlink *devlink; + + devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops, + sizeof(*linecard_dev), &adev->dev); + if (!devlink) + return -ENOMEM; + linecard_dev = devlink_priv(devlink); + linecard_dev->linecard = linecard_bdev->linecard; + linecard_bdev->linecard_dev = linecard_dev; + + devlink_register(devlink); + devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink); + return 0; +} + +static void mlxsw_linecard_bdev_remove(struct auxiliary_device *adev) +{ + struct mlxsw_linecard_bdev *linecard_bdev = + container_of(adev, struct mlxsw_linecard_bdev, adev); + struct devlink *devlink = priv_to_devlink(linecard_bdev->linecard_dev); + struct mlxsw_linecard *linecard = linecard_bdev->linecard; + + devlink_linecard_nested_dl_set(linecard->devlink_linecard, NULL); + devlink_unregister(devlink); + devlink_free(devlink); +} + +static const struct auxiliary_device_id mlxsw_linecard_bdev_id_table[] = { + { .name = KBUILD_MODNAME "." MLXSW_LINECARD_DEV_ID_NAME }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlxsw_linecard_bdev_id_table); + +static struct auxiliary_driver mlxsw_linecard_driver = { + .name = MLXSW_LINECARD_DEV_ID_NAME, + .probe = mlxsw_linecard_bdev_probe, + .remove = mlxsw_linecard_bdev_remove, + .id_table = mlxsw_linecard_bdev_id_table, +}; + +int mlxsw_linecard_driver_register(void) +{ + return auxiliary_driver_register(&mlxsw_linecard_driver); +} + +void mlxsw_linecard_driver_unregister(void) +{ + auxiliary_driver_unregister(&mlxsw_linecard_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c index 5c9869dcf674..ca59f0b946da 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c @@ -13,6 +13,7 @@ #include <linux/vmalloc.h> #include "core.h" +#include "../mlxfw/mlxfw.h" struct mlxsw_linecard_ini_file { __le16 size; @@ -87,6 +88,351 @@ static const char *mlxsw_linecard_type_name(struct mlxsw_linecard *linecard) return linecard->name; } +struct mlxsw_linecard_device_fw_info { + struct mlxfw_dev mlxfw_dev; + struct mlxsw_core *mlxsw_core; + struct mlxsw_linecard *linecard; +}; + +static int mlxsw_linecard_device_fw_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, + u32 *p_max_size, + u8 *p_align_bits, + u16 *p_max_write_size) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcqi_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mcqi), &mcqi_pl); + + mlxsw_reg_mcqi_pack(mcqi_pl, component_index); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits, + p_max_write_size); + + *p_align_bits = max_t(u8, *p_align_bits, 2); + *p_max_write_size = min_t(u16, *p_max_write_size, + MLXSW_REG_MCDA_MAX_DATA_LEN); + return 0; +} + +static int mlxsw_linecard_device_fw_fsm_lock(struct mlxfw_dev *mlxfw_dev, + u32 *fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + u8 control_state; + char *mcc_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state); + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_component_update(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, + u16 component_index, + u32 component_size) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_block_download(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, u8 *data, + u16 size, u32 offset) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcda_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcda), &mcda_pl); + mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, u16 component_index) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int mlxsw_linecard_device_fw_fsm_activate(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, + 0, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static int +mlxsw_linecard_device_fw_fsm_query_state(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + u8 control_state; + u8 error_code; + char *mcc_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state); + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlxsw_linecard_device_fw_fsm_cancel(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, + 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static void mlxsw_linecard_device_fw_fsm_release(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle) +{ + struct mlxsw_linecard_device_fw_info *info = + container_of(mlxfw_dev, struct mlxsw_linecard_device_fw_info, + mlxfw_dev); + struct mlxsw_linecard *linecard = info->linecard; + struct mlxsw_core *mlxsw_core = info->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mcc_pl; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, + linecard->device.index, + MLXSW_REG_MDDT_METHOD_WRITE, + MLXSW_REG(mcc), &mcc_pl); + mlxsw_reg_mcc_pack(mcc_pl, + MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, + 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddt), mddt_pl); +} + +static const struct mlxfw_dev_ops mlxsw_linecard_device_dev_ops = { + .component_query = mlxsw_linecard_device_fw_component_query, + .fsm_lock = mlxsw_linecard_device_fw_fsm_lock, + .fsm_component_update = mlxsw_linecard_device_fw_fsm_component_update, + .fsm_block_download = mlxsw_linecard_device_fw_fsm_block_download, + .fsm_component_verify = mlxsw_linecard_device_fw_fsm_component_verify, + .fsm_activate = mlxsw_linecard_device_fw_fsm_activate, + .fsm_query_state = mlxsw_linecard_device_fw_fsm_query_state, + .fsm_cancel = mlxsw_linecard_device_fw_fsm_cancel, + .fsm_release = mlxsw_linecard_device_fw_fsm_release, +}; + +int mlxsw_linecard_flash_update(struct devlink *linecard_devlink, + struct mlxsw_linecard *linecard, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + struct mlxsw_linecard_device_fw_info info = { + .mlxfw_dev = { + .ops = &mlxsw_linecard_device_dev_ops, + .psid = linecard->device.info.psid, + .psid_size = strlen(linecard->device.info.psid), + .devlink = linecard_devlink, + }, + .mlxsw_core = mlxsw_core, + .linecard = linecard, + }; + int err; + + mutex_lock(&linecard->lock); + if (!linecard->active) { + NL_SET_ERR_MSG_MOD(extack, "Only active line cards can be flashed"); + err = -EINVAL; + goto unlock; + } + err = mlxsw_core_fw_flash(mlxsw_core, &info.mlxfw_dev, + firmware, extack); +unlock: + mutex_unlock(&linecard->lock); + return err; +} + +static int mlxsw_linecard_device_psid_get(struct mlxsw_linecard *linecard, + u8 device_index, char *psid) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + char mddt_pl[MLXSW_REG_MDDT_LEN]; + char *mgir_pl; + int err; + + mlxsw_reg_mddt_pack(mddt_pl, linecard->slot_index, device_index, + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG(mgir), &mgir_pl); + + mlxsw_reg_mgir_pack(mgir_pl); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddt), mddt_pl); + if (err) + return err; + + mlxsw_reg_mgir_fw_info_psid_memcpy_from(mgir_pl, psid); + return 0; +} + +static int mlxsw_linecard_device_info_update(struct mlxsw_linecard *linecard) +{ + struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; + bool flashable_found = false; + u8 msg_seq = 0; + + do { + struct mlxsw_linecard_device_info info; + char mddq_pl[MLXSW_REG_MDDQ_LEN]; + bool flash_owner; + bool data_valid; + u8 device_index; + int err; + + mlxsw_reg_mddq_device_info_pack(mddq_pl, linecard->slot_index, + msg_seq); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl); + if (err) + return err; + mlxsw_reg_mddq_device_info_unpack(mddq_pl, &msg_seq, + &data_valid, &flash_owner, + &device_index, + &info.fw_major, + &info.fw_minor, + &info.fw_sub_minor); + if (!data_valid) + break; + if (!flash_owner) /* We care only about flashable ones. */ + continue; + if (flashable_found) { + dev_warn_once(linecard->linecards->bus_info->dev, "linecard %u: More flashable devices present, exposing only the first one\n", + linecard->slot_index); + return 0; + } + + err = mlxsw_linecard_device_psid_get(linecard, device_index, + info.psid); + if (err) + return err; + + linecard->device.info = info; + linecard->device.index = device_index; + flashable_found = true; + } while (msg_seq); + + return 0; +} + static void mlxsw_linecard_provision_fail(struct mlxsw_linecard *linecard) { linecard->provisioned = false; @@ -226,12 +572,57 @@ void mlxsw_linecards_event_ops_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_linecards_event_ops_unregister); +int mlxsw_linecard_devlink_info_get(struct mlxsw_linecard *linecard, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + char buf[32]; + int err; + + mutex_lock(&linecard->lock); + if (WARN_ON(!linecard->provisioned)) { + err = -EOPNOTSUPP; + goto unlock; + } + + sprintf(buf, "%d", linecard->hw_revision); + err = devlink_info_version_fixed_put(req, "hw.revision", buf); + if (err) + goto unlock; + + sprintf(buf, "%d", linecard->ini_version); + err = devlink_info_version_running_put(req, "ini.version", buf); + if (err) + goto unlock; + + if (linecard->active) { + struct mlxsw_linecard_device_info *info = &linecard->device.info; + + err = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_PSID, + info->psid); + + sprintf(buf, "%u.%u.%u", info->fw_major, info->fw_minor, + info->fw_sub_minor); + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + buf); + if (err) + goto unlock; + } + +unlock: + mutex_unlock(&linecard->lock); + return err; +} + static int mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type, u16 hw_revision, u16 ini_version) { struct mlxsw_linecards *linecards = linecard->linecards; const char *type; + int err; type = mlxsw_linecard_types_lookup(linecards, card_type); mlxsw_linecard_status_event_done(linecard, @@ -252,6 +643,14 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type, linecard->provisioned = true; linecard->hw_revision = hw_revision; linecard->ini_version = ini_version; + + err = mlxsw_linecard_bdev_add(linecard); + if (err) { + linecard->provisioned = false; + mlxsw_linecard_provision_fail(linecard); + return err; + } + devlink_linecard_provision_set(linecard->devlink_linecard, type); return 0; } @@ -260,6 +659,7 @@ static void mlxsw_linecard_provision_clear(struct mlxsw_linecard *linecard) { mlxsw_linecard_status_event_done(linecard, MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION); + mlxsw_linecard_bdev_del(linecard); linecard->provisioned = false; devlink_linecard_provision_clear(linecard->devlink_linecard); } @@ -270,6 +670,10 @@ static int mlxsw_linecard_ready_set(struct mlxsw_linecard *linecard) char mddc_pl[MLXSW_REG_MDDC_LEN]; int err; + err = mlxsw_linecard_device_info_update(linecard); + if (err) + return err; + mlxsw_reg_mddc_pack(mddc_pl, linecard->slot_index, false, true); err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mddc), mddc_pl); if (err) @@ -885,6 +1289,7 @@ static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core, mlxsw_core_flush_owq(); if (linecard->active) mlxsw_linecard_active_clear(linecard); + mlxsw_linecard_bdev_del(linecard); devlink_linecard_destroy(linecard->devlink_linecard); mutex_destroy(&linecard->lock); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 83659fb0559a..50527adc5b5a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -103,6 +103,8 @@ struct mlxsw_pci { struct pci_dev *pdev; u8 __iomem *hw_addr; u64 free_running_clock_offset; + u64 utc_sec_offset; + u64 utc_nsec_offset; struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; u32 doorbell_offset; struct mlxsw_core *core; @@ -456,9 +458,9 @@ static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, { q->u.cq.v = mlxsw_pci->max_cqe_ver; - /* For SDQ it is pointless to use CQEv2, so use CQEv1 instead */ if (q->u.cq.v == MLXSW_PCI_CQE_V2 && - q->num < mlxsw_pci->num_sdq_cqs) + q->num < mlxsw_pci->num_sdq_cqs && + !mlxsw_core_sdq_supports_cqe_v2(mlxsw_pci->core)) q->u.cq.v = MLXSW_PCI_CQE_V1; } @@ -511,9 +513,26 @@ static unsigned int mlxsw_pci_read32_off(struct mlxsw_pci *mlxsw_pci, return ioread32be(mlxsw_pci->hw_addr + off); } +static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci, + struct sk_buff *skb, + enum mlxsw_pci_cqe_v cqe_v, char *cqe) +{ + if (cqe_v != MLXSW_PCI_CQE_V2) + return; + + if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) != + MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC) + return; + + mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe); + mlxsw_skb_cb(skb)->cqe_ts.nsec = + mlxsw_pci_cqe2_time_stamp_nsec_get(cqe); +} + static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, + enum mlxsw_pci_cqe_v cqe_v, char *cqe) { struct pci_dev *pdev = mlxsw_pci->pdev; @@ -533,6 +552,7 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, if (unlikely(!tx_info.is_emad && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe); mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb, tx_info.local_port); skb = NULL; @@ -653,6 +673,8 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe); } + mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe); + byte_count = mlxsw_pci_cqe_byte_count_get(cqe); if (mlxsw_pci_cqe_crc_get(cqe_v, cqe)) byte_count -= ETH_FCS_LEN; @@ -704,7 +726,7 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, ncqe); + wqe_counter, q->u.cq.v, ncqe); q->u.cq.comp_sdq_count++; } else { struct mlxsw_pci_queue *rdq; @@ -1537,6 +1559,24 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, mlxsw_pci->free_running_clock_offset = mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox); + if (mlxsw_cmd_mbox_query_fw_utc_sec_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported UTC sec BAR queried from hw\n"); + err = -EINVAL; + goto err_utc_sec_bar; + } + + mlxsw_pci->utc_sec_offset = + mlxsw_cmd_mbox_query_fw_utc_sec_offset_get(mbox); + + if (mlxsw_cmd_mbox_query_fw_utc_nsec_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported UTC nsec BAR queried from hw\n"); + err = -EINVAL; + goto err_utc_nsec_bar; + } + + mlxsw_pci->utc_nsec_offset = + mlxsw_cmd_mbox_query_fw_utc_nsec_offset_get(mbox); + num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox); err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages); if (err) @@ -1601,6 +1641,8 @@ err_query_resources: err_boardinfo: mlxsw_pci_fw_area_fini(mlxsw_pci); err_fw_area_init: +err_utc_nsec_bar: +err_utc_sec_bar: err_fr_rn_clk_bar: err_doorbell_page_bar: err_iface_rev: @@ -1830,6 +1872,20 @@ static u32 mlxsw_pci_read_frc_l(void *bus_priv) return mlxsw_pci_read32_off(mlxsw_pci, frc_offset_l); } +static u32 mlxsw_pci_read_utc_sec(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + + return mlxsw_pci_read32_off(mlxsw_pci, mlxsw_pci->utc_sec_offset); +} + +static u32 mlxsw_pci_read_utc_nsec(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + + return mlxsw_pci_read32_off(mlxsw_pci, mlxsw_pci->utc_nsec_offset); +} + static const struct mlxsw_bus mlxsw_pci_bus = { .kind = "pci", .init = mlxsw_pci_init, @@ -1839,6 +1895,8 @@ static const struct mlxsw_bus mlxsw_pci_bus = { .cmd_exec = mlxsw_pci_cmd_exec, .read_frc_h = mlxsw_pci_read_frc_h, .read_frc_l = mlxsw_pci_read_frc_l, + .read_utc_sec = mlxsw_pci_read_utc_sec, + .read_utc_nsec = mlxsw_pci_read_utc_nsec, .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ddab5476c8b2..f27bdecdf952 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -11364,6 +11364,95 @@ mlxsw_reg_mbct_unpack(const char *payload, u8 *p_slot_index, *p_fsm_state = mlxsw_reg_mbct_fsm_state_get(payload); } +/* MDDT - Management DownStream Device Tunneling Register + * ------------------------------------------------------ + * This register allows to deliver query and request messages (PRM registers, + * commands) to a DownStream device. + */ +#define MLXSW_REG_MDDT_ID 0x9160 +#define MLXSW_REG_MDDT_LEN 0x110 + +MLXSW_REG_DEFINE(mddt, MLXSW_REG_MDDT_ID, MLXSW_REG_MDDT_LEN); + +/* reg_mddt_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, mddt, slot_index, 0x00, 8, 4); + +/* reg_mddt_device_index + * Device index. + * Access: Index + */ +MLXSW_ITEM32(reg, mddt, device_index, 0x00, 0, 8); + +/* reg_mddt_read_size + * Read size in D-Words. + * Access: OP + */ +MLXSW_ITEM32(reg, mddt, read_size, 0x04, 24, 8); + +/* reg_mddt_write_size + * Write size in D-Words. + * Access: OP + */ +MLXSW_ITEM32(reg, mddt, write_size, 0x04, 16, 8); + +enum mlxsw_reg_mddt_status { + MLXSW_REG_MDDT_STATUS_OK, +}; + +/* reg_mddt_status + * Return code of the Downstream Device to the register that was sent. + * Access: RO + */ +MLXSW_ITEM32(reg, mddt, status, 0x0C, 24, 8); + +enum mlxsw_reg_mddt_method { + MLXSW_REG_MDDT_METHOD_QUERY, + MLXSW_REG_MDDT_METHOD_WRITE, +}; + +/* reg_mddt_method + * Access: OP + */ +MLXSW_ITEM32(reg, mddt, method, 0x0C, 22, 2); + +/* reg_mddt_register_id + * Access: Index + */ +MLXSW_ITEM32(reg, mddt, register_id, 0x0C, 0, 16); + +#define MLXSW_REG_MDDT_PAYLOAD_OFFSET 0x0C +#define MLXSW_REG_MDDT_PRM_REGISTER_HEADER_LEN 4 + +static inline char *mlxsw_reg_mddt_inner_payload(char *payload) +{ + return payload + MLXSW_REG_MDDT_PAYLOAD_OFFSET + + MLXSW_REG_MDDT_PRM_REGISTER_HEADER_LEN; +} + +static inline void mlxsw_reg_mddt_pack(char *payload, u8 slot_index, + u8 device_index, + enum mlxsw_reg_mddt_method method, + const struct mlxsw_reg_info *reg, + char **inner_payload) +{ + int len = reg->len + MLXSW_REG_MDDT_PRM_REGISTER_HEADER_LEN; + + if (WARN_ON(len + MLXSW_REG_MDDT_PAYLOAD_OFFSET > MLXSW_REG_MDDT_LEN)) + len = MLXSW_REG_MDDT_LEN - MLXSW_REG_MDDT_PAYLOAD_OFFSET; + + MLXSW_REG_ZERO(mddt, payload); + mlxsw_reg_mddt_slot_index_set(payload, slot_index); + mlxsw_reg_mddt_device_index_set(payload, device_index); + mlxsw_reg_mddt_method_set(payload, method); + mlxsw_reg_mddt_register_id_set(payload, reg->id); + mlxsw_reg_mddt_read_size_set(payload, len / 4); + mlxsw_reg_mddt_write_size_set(payload, len / 4); + *inner_payload = mlxsw_reg_mddt_inner_payload(payload); +} + /* MDDQ - Management DownStream Device Query Register * -------------------------------------------------- * This register allows to query the DownStream device properties. The desired @@ -11385,7 +11474,11 @@ MLXSW_ITEM32(reg, mddq, sie, 0x00, 31, 1); enum mlxsw_reg_mddq_query_type { MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_INFO = 1, - MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME = 3, + MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO, /* If there are no devices + * on the slot, data_valid + * will be '0'. + */ + MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME, }; /* reg_mddq_query_type @@ -11399,6 +11492,28 @@ MLXSW_ITEM32(reg, mddq, query_type, 0x00, 16, 8); */ MLXSW_ITEM32(reg, mddq, slot_index, 0x00, 0, 4); +/* reg_mddq_response_msg_seq + * Response message sequential number. For a specific request, the response + * message sequential number is the following one. In addition, the last + * message should be 0. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, response_msg_seq, 0x04, 16, 8); + +/* reg_mddq_request_msg_seq + * Request message sequential number. + * The first message number should be 0. + * Access: Index + */ +MLXSW_ITEM32(reg, mddq, request_msg_seq, 0x04, 0, 8); + +/* reg_mddq_data_valid + * If set, the data in the data field is valid and contain the information + * for the queried index. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, data_valid, 0x08, 31, 1); + /* reg_mddq_slot_info_provisioned * If set, the INI file is applied and the card is provisioned. * Access: RO @@ -11485,6 +11600,61 @@ mlxsw_reg_mddq_slot_info_unpack(const char *payload, u8 *p_slot_index, *p_card_type = mlxsw_reg_mddq_slot_info_card_type_get(payload); } +/* reg_mddq_device_info_flash_owner + * If set, the device is the flash owner. Otherwise, a shared flash + * is used by this device (another device is the flash owner). + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_flash_owner, 0x10, 30, 1); + +/* reg_mddq_device_info_device_index + * Device index. The first device should number 0. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_device_index, 0x10, 0, 8); + +/* reg_mddq_device_info_fw_major + * Major FW version number. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_fw_major, 0x14, 16, 16); + +/* reg_mddq_device_info_fw_minor + * Minor FW version number. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_fw_minor, 0x18, 16, 16); + +/* reg_mddq_device_info_fw_sub_minor + * Sub-minor FW version number. + * Access: RO + */ +MLXSW_ITEM32(reg, mddq, device_info_fw_sub_minor, 0x18, 0, 16); + +static inline void +mlxsw_reg_mddq_device_info_pack(char *payload, u8 slot_index, + u8 request_msg_seq) +{ + __mlxsw_reg_mddq_pack(payload, slot_index, + MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO); + mlxsw_reg_mddq_request_msg_seq_set(payload, request_msg_seq); +} + +static inline void +mlxsw_reg_mddq_device_info_unpack(const char *payload, u8 *p_response_msg_seq, + bool *p_data_valid, bool *p_flash_owner, + u8 *p_device_index, u16 *p_fw_major, + u16 *p_fw_minor, u16 *p_fw_sub_minor) +{ + *p_response_msg_seq = mlxsw_reg_mddq_response_msg_seq_get(payload); + *p_data_valid = mlxsw_reg_mddq_data_valid_get(payload); + *p_flash_owner = mlxsw_reg_mddq_device_info_flash_owner_get(payload); + *p_device_index = mlxsw_reg_mddq_device_info_device_index_get(payload); + *p_fw_major = mlxsw_reg_mddq_device_info_fw_major_get(payload); + *p_fw_minor = mlxsw_reg_mddq_device_info_fw_minor_get(payload); + *p_fw_sub_minor = mlxsw_reg_mddq_device_info_fw_sub_minor_get(payload); +} + #define MLXSW_REG_MDDQ_SLOT_ASCII_NAME_LEN 20 /* reg_mddq_slot_ascii_name @@ -12862,6 +13032,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mfgd), MLXSW_REG(mgpir), MLXSW_REG(mbct), + MLXSW_REG(mddt), MLXSW_REG(mddq), MLXSW_REG(mddc), MLXSW_REG(mfde), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 641078060b02..1e240cdd9cbd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -29,6 +29,7 @@ #include <net/pkt_cls.h> #include <net/netevent.h> #include <net/addrconf.h> +#include <linux/ptp_classify.h> #include "spectrum.h" #include "pci.h" @@ -230,8 +231,8 @@ void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, counter_index); } -static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) +void mlxsw_sp_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) { char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN); @@ -246,6 +247,82 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); } +int +mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + char *txhdr; + u16 max_fid; + int err; + + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + err = -ENOMEM; + goto err_skb_cow_head; + } + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, FID)) { + err = -EIO; + goto err_res_valid; + } + max_fid = MLXSW_CORE_RES_GET(mlxsw_core, FID); + + txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); + mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_hdr_rx_is_router_set(txhdr, true); + mlxsw_tx_hdr_fid_valid_set(txhdr, true); + mlxsw_tx_hdr_fid_set(txhdr, max_fid + tx_info->local_port - 1); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_DATA); + return 0; + +err_res_valid: +err_skb_cow_head: + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return err; +} + +static bool mlxsw_sp_skb_requires_ts(struct sk_buff *skb) +{ + unsigned int type; + + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + type = ptp_classify_raw(skb); + return !!ptp_parse_header(skb, type); +} + +static int mlxsw_sp_txhdr_handle(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + /* In Spectrum-2 and Spectrum-3, PTP events that require a time stamp + * need special handling and cannot be transmitted as regular control + * packets. + */ + if (unlikely(mlxsw_sp_skb_requires_ts(skb))) + return mlxsw_sp->ptp_ops->txhdr_construct(mlxsw_core, + mlxsw_sp_port, skb, + tx_info); + + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return -ENOMEM; + } + + mlxsw_sp_txhdr_construct(skb, tx_info); + return 0; +} + enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state) { switch (state) { @@ -648,12 +725,6 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; - if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) @@ -664,7 +735,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - mlxsw_sp_txhdr_construct(skb, &tx_info); + err = mlxsw_sp_txhdr_handle(mlxsw_sp->core, mlxsw_sp_port, skb, + &tx_info); + if (err) + return NETDEV_TX_OK; + /* TX header is consumed by HW on the way so we shouldn't count its * bytes as being sent. */ @@ -2666,6 +2741,7 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .get_stats_count = mlxsw_sp1_get_stats_count, .get_stats_strings = mlxsw_sp1_get_stats_strings, .get_stats = mlxsw_sp1_get_stats, + .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { @@ -2682,6 +2758,24 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { .get_stats_count = mlxsw_sp2_get_stats_count, .get_stats_strings = mlxsw_sp2_get_stats_strings, .get_stats = mlxsw_sp2_get_stats, + .txhdr_construct = mlxsw_sp2_ptp_txhdr_construct, +}; + +static const struct mlxsw_sp_ptp_ops mlxsw_sp4_ptp_ops = { + .clock_init = mlxsw_sp2_ptp_clock_init, + .clock_fini = mlxsw_sp2_ptp_clock_fini, + .init = mlxsw_sp2_ptp_init, + .fini = mlxsw_sp2_ptp_fini, + .receive = mlxsw_sp2_ptp_receive, + .transmitted = mlxsw_sp2_ptp_transmitted, + .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp2_ptp_shaper_work, + .get_ts_info = mlxsw_sp2_ptp_get_ts_info, + .get_stats_count = mlxsw_sp2_get_stats_count, + .get_stats_strings = mlxsw_sp2_get_stats_strings, + .get_stats = mlxsw_sp2_get_stats, + .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, }; struct mlxsw_sp_sample_trigger_node { @@ -3327,7 +3421,7 @@ static int mlxsw_sp4_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; - mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp4_ptp_ops; mlxsw_sp->span_ops = &mlxsw_sp3_span_ops; mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops; mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops; @@ -3831,6 +3925,7 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, + .sdq_supports_cqe_v2 = false, }; static struct mlxsw_driver mlxsw_sp2_driver = { @@ -3869,6 +3964,7 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, + .sdq_supports_cqe_v2 = true, }; static struct mlxsw_driver mlxsw_sp3_driver = { @@ -3907,6 +4003,7 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, + .sdq_supports_cqe_v2 = true, }; static struct mlxsw_driver mlxsw_sp4_driver = { @@ -3943,6 +4040,7 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, + .sdq_supports_cqe_v2 = true, }; bool mlxsw_sp_port_dev_check(const struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 50a9380b76e9..c8ff2a6d7e90 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -243,6 +243,10 @@ struct mlxsw_sp_ptp_ops { void (*get_stats_strings)(u8 **p); void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, u64 *data, int data_index); + int (*txhdr_construct)(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); }; static inline struct mlxsw_sp_upper * @@ -700,6 +704,12 @@ int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int *p_counter_index); void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index); +void mlxsw_sp_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); +int mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); bool mlxsw_sp_port_dev_check(const struct net_device *dev); struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 5116d7ebe258..2e0b704b8a31 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -11,6 +11,7 @@ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/net_tstamp.h> +#include <linux/refcount.h> #include "spectrum.h" #include "spectrum_ptp.h" @@ -39,6 +40,14 @@ struct mlxsw_sp1_ptp_state { u32 gc_cycle; }; +struct mlxsw_sp2_ptp_state { + struct mlxsw_sp_ptp_state common; + refcount_t ptp_port_enabled_ref; /* Number of ports with time stamping + * enabled. + */ + struct hwtstamp_config config; +}; + struct mlxsw_sp1_ptp_key { u16 local_port; u8 message_type; @@ -85,6 +94,13 @@ mlxsw_sp1_ptp_state(struct mlxsw_sp *mlxsw_sp) common); } +static struct mlxsw_sp2_ptp_state * +mlxsw_sp2_ptp_state(struct mlxsw_sp *mlxsw_sp) +{ + return container_of(mlxsw_sp->ptp_state, struct mlxsw_sp2_ptp_state, + common); +} + static struct mlxsw_sp1_ptp_clock * mlxsw_sp1_ptp_clock(struct ptp_clock_info *ptp) { @@ -328,6 +344,153 @@ void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock_common) kfree(clock); } +static u64 mlxsw_sp2_ptp_read_utc(struct mlxsw_sp_ptp_clock *clock, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_core *mlxsw_core = clock->core; + u32 utc_sec1, utc_sec2, utc_nsec; + + utc_sec1 = mlxsw_core_read_utc_sec(mlxsw_core); + ptp_read_system_prets(sts); + utc_nsec = mlxsw_core_read_utc_nsec(mlxsw_core); + ptp_read_system_postts(sts); + utc_sec2 = mlxsw_core_read_utc_sec(mlxsw_core); + + if (utc_sec1 != utc_sec2) { + /* Wrap around. */ + ptp_read_system_prets(sts); + utc_nsec = mlxsw_core_read_utc_nsec(mlxsw_core); + ptp_read_system_postts(sts); + } + + return (u64)utc_sec2 * NSEC_PER_SEC + utc_nsec; +} + +static int +mlxsw_sp2_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec) +{ + struct mlxsw_core *mlxsw_core = clock->core; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + u32 sec, nsec_rem; + + sec = div_u64_rem(nsec, NSEC_PER_SEC, &nsec_rem); + mlxsw_reg_mtutc_pack(mtutc_pl, + MLXSW_REG_MTUTC_OPERATION_SET_TIME_IMMEDIATE, + 0, sec, nsec_rem, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static int mlxsw_sp2_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + s32 ppb = scaled_ppm_to_ppb(scaled_ppm); + + /* In Spectrum-2 and newer ASICs, the frequency adjustment in MTUTC is + * reversed, positive values mean to decrease the frequency. Adjust the + * sign of PPB to this behavior. + */ + return mlxsw_sp_ptp_phc_adjfreq(clock, -ppb); +} + +static int mlxsw_sp2_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + struct mlxsw_core *mlxsw_core = clock->core; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + + /* HW time adjustment range is s16. If out of range, set time instead. */ + if (delta < S16_MIN || delta > S16_MAX) { + u64 nsec; + + nsec = mlxsw_sp2_ptp_read_utc(clock, NULL); + nsec += delta; + + return mlxsw_sp2_ptp_phc_settime(clock, nsec); + } + + mlxsw_reg_mtutc_pack(mtutc_pl, + MLXSW_REG_MTUTC_OPERATION_ADJUST_TIME, + 0, 0, 0, delta); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static int mlxsw_sp2_ptp_gettimex(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec; + + nsec = mlxsw_sp2_ptp_read_utc(clock, sts); + *ts = ns_to_timespec64(nsec); + + return 0; +} + +static int mlxsw_sp2_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec = timespec64_to_ns(ts); + + return mlxsw_sp2_ptp_phc_settime(clock, nsec); +} + +static const struct ptp_clock_info mlxsw_sp2_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlxsw_sp_clock", + .max_adj = MLXSW_REG_MTUTC_MAX_FREQ_ADJ, + .adjfine = mlxsw_sp2_ptp_adjfine, + .adjtime = mlxsw_sp2_ptp_adjtime, + .gettimex64 = mlxsw_sp2_ptp_gettimex, + .settime64 = mlxsw_sp2_ptp_settime, +}; + +struct mlxsw_sp_ptp_clock * +mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + struct mlxsw_sp_ptp_clock *clock; + int err; + + clock = kzalloc(sizeof(*clock), GFP_KERNEL); + if (!clock) + return ERR_PTR(-ENOMEM); + + clock->core = mlxsw_sp->core; + + clock->ptp_info = mlxsw_sp2_ptp_clock_info; + + err = mlxsw_sp2_ptp_phc_settime(clock, 0); + if (err) { + dev_err(dev, "setting UTC time failed %d\n", err); + goto err_ptp_phc_settime; + } + + clock->ptp = ptp_clock_register(&clock->ptp_info, dev); + if (IS_ERR(clock->ptp)) { + err = PTR_ERR(clock->ptp); + dev_err(dev, "ptp_clock_register failed %d\n", err); + goto err_ptp_clock_register; + } + + return clock; + +err_ptp_clock_register: +err_ptp_phc_settime: + kfree(clock); + return ERR_PTR(err); +} + +void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ + ptp_clock_unregister(clock->ptp); + kfree(clock); +} + static int mlxsw_sp_ptp_parse(struct sk_buff *skb, u8 *p_domain_number, u8 *p_message_type, @@ -835,10 +998,44 @@ static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_ptp_traps_set(struct mlxsw_sp *mlxsw_sp) +{ + u16 event_message_type; + int err; + + /* Deliver these message types as PTP0. */ + event_message_type = BIT(PTP_MSGTYPE_SYNC) | + BIT(PTP_MSGTYPE_DELAY_REQ) | + BIT(PTP_MSGTYPE_PDELAY_REQ) | + BIT(PTP_MSGTYPE_PDELAY_RESP); + + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + event_message_type); + if (err) + return err; + + /* Everything else is PTP1. */ + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, + ~event_message_type); + if (err) + goto err_mtptpt1_set; + + return 0; + +err_mtptpt1_set: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); + return err; +} + +static void mlxsw_sp_ptp_traps_unset(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); +} + struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp1_ptp_state *ptp_state; - u16 message_type; int err; err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp); @@ -857,22 +1054,9 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_hashtable_init; - /* Delive these message types as PTP0. */ - message_type = BIT(PTP_MSGTYPE_SYNC) | - BIT(PTP_MSGTYPE_DELAY_REQ) | - BIT(PTP_MSGTYPE_PDELAY_REQ) | - BIT(PTP_MSGTYPE_PDELAY_RESP); - err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, - message_type); + err = mlxsw_sp_ptp_traps_set(mlxsw_sp); if (err) - goto err_mtptpt_set; - - /* Everything else is PTP1. */ - message_type = ~message_type; - err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, - message_type); - if (err) - goto err_mtptpt1_set; + goto err_ptp_traps_set; err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true); if (err) @@ -884,10 +1068,8 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) return &ptp_state->common; err_fifo_clr: - mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); -err_mtptpt1_set: - mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); -err_mtptpt_set: + mlxsw_sp_ptp_traps_unset(mlxsw_sp); +err_ptp_traps_set: rhltable_destroy(&ptp_state->unmatched_ht); err_hashtable_init: kfree(ptp_state); @@ -904,8 +1086,7 @@ void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) cancel_delayed_work_sync(&ptp_state->ht_gc_dw); mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0); mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false); - mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); - mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); + mlxsw_sp_ptp_traps_unset(mlxsw_sp); rhltable_free_and_destroy(&ptp_state->unmatched_ht, &mlxsw_sp1_ptp_unmatched_free_fn, NULL); kfree(ptp_state); @@ -1176,3 +1357,354 @@ void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, *data++ = *(u64 *)(stats + offset); } } + +struct mlxsw_sp_ptp_state *mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + int err; + + ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL); + if (!ptp_state) + return ERR_PTR(-ENOMEM); + + ptp_state->common.mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp_ptp_traps_set(mlxsw_sp); + if (err) + goto err_ptp_traps_set; + + refcount_set(&ptp_state->ptp_port_enabled_ref, 0); + return &ptp_state->common; + +err_ptp_traps_set: + kfree(ptp_state); + return ERR_PTR(err); +} + +void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) +{ + struct mlxsw_sp *mlxsw_sp = ptp_state_common->mlxsw_sp; + struct mlxsw_sp2_ptp_state *ptp_state; + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); + + mlxsw_sp_ptp_traps_unset(mlxsw_sp); + kfree(ptp_state); +} + +static u32 mlxsw_ptp_utc_time_stamp_sec_get(struct mlxsw_core *mlxsw_core, + u8 cqe_ts_sec) +{ + u32 utc_sec = mlxsw_core_read_utc_sec(mlxsw_core); + + if (cqe_ts_sec > (utc_sec & 0xff)) + /* Time stamp above the last bits of UTC (UTC & 0xff) means the + * latter has wrapped after the time stamp was collected. + */ + utc_sec -= 256; + + utc_sec &= ~0xff; + utc_sec |= cqe_ts_sec; + + return utc_sec; +} + +static void mlxsw_sp2_ptp_hwtstamp_fill(struct mlxsw_core *mlxsw_core, + const struct mlxsw_skb_cb *cb, + struct skb_shared_hwtstamps *hwtstamps) +{ + u64 ts_sec, ts_nsec, nsec; + + WARN_ON_ONCE(!cb->cqe_ts.sec && !cb->cqe_ts.nsec); + + /* The time stamp in the CQE is represented by 38 bits, which is a short + * representation of UTC time. Software should create the full time + * stamp using the global UTC clock. The seconds have only 8 bits in the + * CQE, to create the full time stamp, use the current UTC time and fix + * the seconds according to the relation between UTC seconds and CQE + * seconds. + */ + ts_sec = mlxsw_ptp_utc_time_stamp_sec_get(mlxsw_core, cb->cqe_ts.sec); + ts_nsec = cb->cqe_ts.nsec; + + nsec = ts_sec * NSEC_PER_SEC + ts_nsec; + + hwtstamps->hwtstamp = ns_to_ktime(nsec); +} + +void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port) +{ + struct skb_shared_hwtstamps hwtstamps; + + mlxsw_sp2_ptp_hwtstamp_fill(mlxsw_sp->core, mlxsw_skb_cb(skb), + &hwtstamps); + *skb_hwtstamps(skb) = hwtstamps; + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port) +{ + struct skb_shared_hwtstamps hwtstamps; + + mlxsw_sp2_ptp_hwtstamp_fill(mlxsw_sp->core, mlxsw_skb_cb(skb), + &hwtstamps); + skb_tstamp_tx(skb, &hwtstamps); + dev_kfree_skb_any(skb); +} + +int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + + *config = ptp_state->config; + return 0; +} + +static int +mlxsw_sp2_ptp_get_message_types(const struct hwtstamp_config *config, + u16 *p_ing_types, u16 *p_egr_types, + enum hwtstamp_rx_filters *p_rx_filter) +{ + enum hwtstamp_rx_filters rx_filter = config->rx_filter; + enum hwtstamp_tx_types tx_type = config->tx_type; + u16 ing_types = 0x00; + u16 egr_types = 0x00; + + *p_rx_filter = rx_filter; + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + ing_types = 0x00; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + /* In Spectrum-2 and above, all packets get time stamp by + * default and the driver fill the time stamp only for event + * packets. Return all event types even if only specific types + * were required. + */ + ing_types = 0x0f; + *p_rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -ERANGE; + default: + return -EINVAL; + } + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + egr_types = 0x00; + break; + case HWTSTAMP_TX_ON: + egr_types = 0x0f; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: + return -ERANGE; + default: + return -EINVAL; + } + + *p_ing_types = ing_types; + *p_egr_types = egr_types; + return 0; +} + +static int mlxsw_sp2_ptp_mtpcpc_set(struct mlxsw_sp *mlxsw_sp, bool ptp_trap_en, + u16 ing_types, u16 egr_types) +{ + char mtpcpc_pl[MLXSW_REG_MTPCPC_LEN]; + + mlxsw_reg_mtpcpc_pack(mtpcpc_pl, false, 0, ptp_trap_en, ing_types, + egr_types); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpcpc), mtpcpc_pl); +} + +static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, + u16 egr_types, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); + int err; + + err = mlxsw_sp2_ptp_mtpcpc_set(mlxsw_sp, true, ing_types, egr_types); + if (err) + return err; + + ptp_state->config = new_config; + return 0; +} + +static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); + int err; + + err = mlxsw_sp2_ptp_mtpcpc_set(mlxsw_sp, false, 0, 0); + if (err) + return err; + + ptp_state->config = new_config; + return 0; +} + +static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ing_types, u16 egr_types, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + int err; + + ASSERT_RTNL(); + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + + if (refcount_inc_not_zero(&ptp_state->ptp_port_enabled_ref)) + return 0; + + err = mlxsw_sp2_ptp_enable(mlxsw_sp_port->mlxsw_sp, ing_types, + egr_types, new_config); + if (err) + return err; + + refcount_set(&ptp_state->ptp_port_enabled_ref, 1); + + return 0; +} + +static int mlxsw_sp2_ptp_deconfigure_port(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config new_config) +{ + struct mlxsw_sp2_ptp_state *ptp_state; + int err; + + ASSERT_RTNL(); + + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + + if (!refcount_dec_and_test(&ptp_state->ptp_port_enabled_ref)) + return 0; + + err = mlxsw_sp2_ptp_disable(mlxsw_sp_port->mlxsw_sp, new_config); + if (err) + goto err_ptp_disable; + + return 0; + +err_ptp_disable: + refcount_set(&ptp_state->ptp_port_enabled_ref, 1); + return err; +} + +int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + enum hwtstamp_rx_filters rx_filter; + struct hwtstamp_config new_config; + u16 new_ing_types, new_egr_types; + bool ptp_enabled; + int err; + + err = mlxsw_sp2_ptp_get_message_types(config, &new_ing_types, + &new_egr_types, &rx_filter); + if (err) + return err; + + new_config.flags = config->flags; + new_config.tx_type = config->tx_type; + new_config.rx_filter = rx_filter; + + ptp_enabled = mlxsw_sp_port->ptp.ing_types || + mlxsw_sp_port->ptp.egr_types; + + if ((new_ing_types || new_egr_types) && !ptp_enabled) { + err = mlxsw_sp2_ptp_configure_port(mlxsw_sp_port, new_ing_types, + new_egr_types, new_config); + if (err) + return err; + } else if (!new_ing_types && !new_egr_types && ptp_enabled) { + err = mlxsw_sp2_ptp_deconfigure_port(mlxsw_sp_port, new_config); + if (err) + return err; + } + + mlxsw_sp_port->ptp.ing_types = new_ing_types; + mlxsw_sp_port->ptp.egr_types = new_egr_types; + + /* Notify the ioctl caller what we are actually timestamping. */ + config->rx_filter = rx_filter; + + return 0; +} + +int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +} + +int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + mlxsw_sp_txhdr_construct(skb, tx_info); + return 0; +} + +int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + /* In Spectrum-2 and Spectrum-3, in order for PTP event packets to have + * their correction field correctly set on the egress port they must be + * transmitted as data packets. Such packets ingress the ASIC via the + * CPU port and must have a VLAN tag, as the CPU port is not configured + * with a PVID. Push the default VLAN (4095), which is configured as + * egress untagged on all the ports. + */ + if (!skb_vlan_tagged(skb)) { + skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q), + MLXSW_SP_DEFAULT_VID); + if (!skb) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + return -ENOMEM; + } + } + + return mlxsw_sp_txhdr_ptp_data_construct(mlxsw_core, mlxsw_sp_port, skb, + tx_info); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index c06cd1384bca..2d1628fdefc1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -57,6 +57,40 @@ void mlxsw_sp1_get_stats_strings(u8 **p); void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 *data, int data_index); +int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + +struct mlxsw_sp_ptp_clock * +mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev); + +void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock); + +struct mlxsw_sp_ptp_state *mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp); + +void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state); + +void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u16 local_port); + +void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u16 local_port); + +int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); + +int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + #else static inline struct mlxsw_sp_ptp_clock * @@ -136,7 +170,14 @@ static inline void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 *data, int data_index) { } -#endif + +int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + return -EOPNOTSUPP; +} static inline struct mlxsw_sp_ptp_clock * mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) @@ -184,16 +225,25 @@ mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } -static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work) -{ -} - static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, struct ethtool_ts_info *info) { return mlxsw_sp_ptp_get_ts_info_noptp(info); } +int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + return -EOPNOTSUPP; +} +#endif + +static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work) +{ +} + static inline int mlxsw_sp2_get_stats_count(void) { return 0; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e31f8fbbc696..df2ab5cbd49b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -4233,7 +4233,7 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) } /* If the chain is ended by an load/store pair then this - * could serve as the new head of the the next chain. + * could serve as the new head of the next chain. */ if (curr_pair_is_memcpy(meta1, meta2)) { head_ld_meta = meta1; diff --git a/drivers/net/ethernet/sfc/falcon/net_driver.h b/drivers/net/ethernet/sfc/falcon/net_driver.h index a381cf9ec4f3..a2c7139f2b32 100644 --- a/drivers/net/ethernet/sfc/falcon/net_driver.h +++ b/drivers/net/ethernet/sfc/falcon/net_driver.h @@ -679,7 +679,7 @@ union ef4_multicast_hash { * @n_rx_channels: Number of channels used for RX (= number of RX queues) * @n_tx_channels: Number of channels used for TX * @rx_ip_align: RX DMA address offset to have IP header aligned in - * in accordance with NET_IP_ALIGN + * accordance with NET_IP_ALIGN * @rx_dma_len: Current maximum RX DMA length * @rx_buffer_order: Order (log2) of number of pages for each RX buffer * @rx_buffer_truesize: Amortised allocation size of an RX buffer, diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 4625f85acab2..10ad0b93d283 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); if (tx_queue && tx_queue->timestamping) { + /* This code invokes normal driver TX code which is always + * protected from softirqs when called from generic TX code, + * which in turn disables preemption. Look at __dev_queue_xmit + * which uses rcu_read_lock_bh disabling preemption for RCU + * plus disabling softirqs. We do not need RCU reader + * protection here. + * + * Although it is theoretically safe for current PTP TX/RX code + * running without disabling softirqs, there are three good + * reasond for doing so: + * + * 1) The code invoked is mainly implemented for non-PTP + * packets and it is always executed with softirqs + * disabled. + * 2) This being a single PTP packet, better to not + * interrupt its processing by softirqs which can lead + * to high latencies. + * 3) netdev_xmit_more checks preemption is disabled and + * triggers a BUG_ON if not. + */ + local_bh_disable(); efx_enqueue_skb(tx_queue, skb); + local_bh_enable(); } else { WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); dev_kfree_skb_any(skb); diff --git a/drivers/net/ethernet/sfc/siena/net_driver.h b/drivers/net/ethernet/sfc/siena/net_driver.h index c4a97fbf4672..ff7bbc325952 100644 --- a/drivers/net/ethernet/sfc/siena/net_driver.h +++ b/drivers/net/ethernet/sfc/siena/net_driver.h @@ -838,7 +838,7 @@ enum efx_xdp_tx_queues_mode { * @xdp_channel_offset: Offset of zeroth channel used for XPD TX. * @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel. * @rx_ip_align: RX DMA address offset to have IP header aligned in - * in accordance with NET_IP_ALIGN + * accordance with NET_IP_ALIGN * @rx_dma_len: Current maximum RX DMA length * @rx_buffer_order: Order (log2) of number of pages for each RX buffer * @rx_buffer_truesize: Amortised allocation size of an RX buffer, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index ca8ab290013c..d42e1afb6521 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -688,18 +688,19 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) ret = mediatek_dwmac_clks_config(priv_plat, true); if (ret) - return ret; + goto err_remove_config_dt; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) { - stmmac_remove_config_dt(pdev, plat_dat); + if (ret) goto err_drv_probe; - } return 0; err_drv_probe: mediatek_dwmac_clks_config(priv_plat, false); +err_remove_config_dt: + stmmac_remove_config_dt(pdev, plat_dat); + return ret; } diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2495a5719e1c..018d365f9deb 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,6 +815,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->saddr = info->key.u.ipv4.src; fl4->fl4_dport = dport; fl4->fl4_sport = sport; + fl4->flowi4_flags = info->key.flow_flags; tos = info->key.tos; if ((tos == 1) && !geneve->cfg.collect_md) { diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h index 3233d145fd87..495e85abe50b 100644 --- a/drivers/net/ipa/ipa_qmi_msg.h +++ b/drivers/net/ipa/ipa_qmi_msg.h @@ -214,7 +214,7 @@ struct ipa_init_modem_driver_req { /* The response to a IPA_QMI_INIT_DRIVER request begins with a standard * QMI response, but contains other information as well. Currently we - * simply wait for the the INIT_DRIVER transaction to complete and + * simply wait for the INIT_DRIVER transaction to complete and * ignore any other data that might be returned. */ struct ipa_init_modem_driver_rsp { diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c881e1bf6f6e..f1683ce6b561 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -243,6 +243,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 +#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) static bool send_sci(const struct macsec_secy *secy) { @@ -1697,7 +1698,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) return false; if (attrs[MACSEC_SA_ATTR_PN] && - *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) + nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -1753,7 +1754,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; - if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + if (tb_sa[MACSEC_SA_ATTR_PN] && + nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); @@ -1769,7 +1771,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SA_ATTR_SALT); + MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } @@ -1842,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) return 0; cleanup: - kfree(rx_sa); + macsec_rxsa_put(rx_sa); rtnl_unlock(); return err; } @@ -1939,7 +1941,7 @@ static bool validate_add_txsa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; - if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) + if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -2011,7 +2013,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SA_ATTR_SALT); + MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } @@ -2085,7 +2087,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) cleanup: secy->operational = was_operational; - kfree(tx_sa); + macsec_txsa_put(tx_sa); rtnl_unlock(); return err; } @@ -2293,7 +2295,7 @@ static bool validate_upd_sa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; - if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) + if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -3745,9 +3747,6 @@ static int macsec_changelink_common(struct net_device *dev, secy->operational = tx_sa && tx_sa->active; } - if (data[IFLA_MACSEC_WINDOW]) - secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); - if (data[IFLA_MACSEC_ENCRYPT]) tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); @@ -3793,6 +3792,16 @@ static int macsec_changelink_common(struct net_device *dev, } } + if (data[IFLA_MACSEC_WINDOW]) { + secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); + + /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window + * for XPN cipher suites */ + if (secy->xpn && + secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) + return -EINVAL; + } + return 0; } @@ -3822,7 +3831,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], ret = macsec_changelink_common(dev, data); if (ret) - return ret; + goto cleanup; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index a43820212932..50854265864d 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -351,10 +351,12 @@ nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; - nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); + nmap->entry[idx].key = kmalloc(offmap->map.key_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].key) return -ENOMEM; - nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); + nmap->entry[idx].value = kmalloc(offmap->map.value_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].value) { kfree(nmap->entry[idx].key); nmap->entry[idx].key = NULL; @@ -496,7 +498,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) if (offmap->map.map_flags) return -EINVAL; - nmap = kzalloc(sizeof(*nmap), GFP_USER); + nmap = kzalloc(sizeof(*nmap), GFP_KERNEL_ACCOUNT); if (!nmap) return -ENOMEM; diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index ab0af1d2531f..70f88eae2a9e 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -986,7 +986,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, */ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS); if (ret < 0) - return false; + return ret; if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) { int speed_value; diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index ff22b6b1c686..36803d932dff 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -450,6 +450,7 @@ static int bcm5421_init(struct mii_phy* phy) int can_low_power = 1; if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) can_low_power = 0; + of_node_put(np); if (can_low_power) { /* Enable automatic low-power */ sungem_phy_write(phy, 0x1c, 0x9002); diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 1e5c15363a2e..843893482abd 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -8,13 +8,13 @@ * * Based on the work of * Donald Becker - * + * * Old chipset support added by Simon Evans <spse@secret.org.uk> 2002 * - adds support for Belkin F5U011 */ /* - * + * * Should you need to contact me, the author, you can do so either by * e-mail - mail your message to <vojtech@suse.cz>, or by paper mail: * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic @@ -54,7 +54,7 @@ static const char driver_name[] = "catc"; /* * Some defines. - */ + */ #define STATS_UPDATE (HZ) /* Time between stats updates */ #define TX_TIMEOUT (5*HZ) /* Max time the queue can be stopped */ @@ -332,7 +332,7 @@ static void catc_irq_done(struct urb *urb) dev_err(&catc->usbdev->dev, "submit(rx_urb) status %d\n", res); } - } + } } resubmit: res = usb_submit_urb (urb, GFP_ATOMIC); @@ -538,7 +538,7 @@ static int catc_ctrl_async(struct catc *catc, u8 dir, u8 request, u16 value, unsigned long flags; spin_lock_irqsave(&catc->ctrl_lock, flags); - + q = catc->ctrl_queue + catc->ctrl_head; q->dir = dir; @@ -639,7 +639,7 @@ static void catc_set_multicast_list(struct net_device *netdev) if (netdev->flags & IFF_PROMISC) { memset(catc->multicast, 0xff, 64); rx |= (!catc->is_f5u011) ? RxPromisc : AltRxPromisc; - } + } if (netdev->flags & IFF_ALLMULTI) { memset(catc->multicast, 0xff, 64); @@ -806,7 +806,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id catc->tx_urb = usb_alloc_urb(0, GFP_KERNEL); catc->rx_urb = usb_alloc_urb(0, GFP_KERNEL); catc->irq_urb = usb_alloc_urb(0, GFP_KERNEL); - if ((!catc->ctrl_urb) || (!catc->tx_urb) || + if ((!catc->ctrl_urb) || (!catc->tx_urb) || (!catc->rx_urb) || (!catc->irq_urb)) { dev_err(&intf->dev, "No free urbs available.\n"); ret = -ENOMEM; @@ -814,17 +814,17 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id } /* The F5U011 has the same vendor/product as the netmate but a device version of 0x130 */ - if (le16_to_cpu(usbdev->descriptor.idVendor) == 0x0423 && + if (le16_to_cpu(usbdev->descriptor.idVendor) == 0x0423 && le16_to_cpu(usbdev->descriptor.idProduct) == 0xa && le16_to_cpu(catc->usbdev->descriptor.bcdDevice) == 0x0130) { dev_dbg(dev, "Testing for f5u011\n"); - catc->is_f5u011 = 1; + catc->is_f5u011 = 1; atomic_set(&catc->recq_sz, 0); pktsz = RX_PKT_SZ; } else { pktsz = RX_MAX_BURST * (PKT_SZ + 2); } - + usb_fill_control_urb(catc->ctrl_urb, usbdev, usb_sndctrlpipe(usbdev, 0), NULL, NULL, 0, catc_ctrl_done, catc); @@ -854,7 +854,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id *buf = 0x87654321; catc_write_mem(catc, 0xfa80, buf, 4); catc_read_mem(catc, 0x7a80, buf, 4); - + switch (*buf) { case 0x12345678: catc_set_reg(catc, TxBufCount, 8); @@ -873,32 +873,32 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id } kfree(buf); - + dev_dbg(dev, "Getting MAC from SEEROM.\n"); - + catc_get_mac(catc, macbuf); eth_hw_addr_set(netdev, macbuf); - + dev_dbg(dev, "Setting MAC into registers.\n"); - + for (i = 0; i < 6; i++) catc_set_reg(catc, StationAddr0 - i, netdev->dev_addr[i]); - + dev_dbg(dev, "Filling the multicast list.\n"); - + eth_broadcast_addr(broadcast); catc_multicast(broadcast, catc->multicast); catc_multicast(netdev->dev_addr, catc->multicast); catc_write_mem(catc, 0xfa80, catc->multicast, 64); - + dev_dbg(dev, "Clearing error counters.\n"); - + for (i = 0; i < 8; i++) catc_set_reg(catc, EthStats + i, 0); catc->last_stats = jiffies; - + dev_dbg(dev, "Enabling.\n"); - + catc_set_reg(catc, MaxBurst, RX_MAX_BURST); catc_set_reg(catc, OpModes, OpTxMerge | OpRxMerge | OpLenInclude | Op3MemWaits); catc_set_reg(catc, LEDCtrl, LEDLink); @@ -908,7 +908,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id catc_reset(catc); catc_get_mac(catc, macbuf); eth_hw_addr_set(netdev, macbuf); - + dev_dbg(dev, "Setting RX Mode\n"); catc->rxmode[0] = RxEnable | RxPolarity | RxMultiCast; catc->rxmode[1] = 0; diff --git a/drivers/net/usb/cdc_subset.c b/drivers/net/usb/cdc_subset.c index 32637df0f4cc..f4a44f05c6ab 100644 --- a/drivers/net/usb/cdc_subset.c +++ b/drivers/net/usb/cdc_subset.c @@ -120,7 +120,7 @@ static const struct driver_info an2720_info = { #endif /* CONFIG_USB_AN2720 */ - + #ifdef CONFIG_USB_BELKIN #define HAVE_HARDWARE @@ -140,7 +140,7 @@ static const struct driver_info belkin_info = { #endif /* CONFIG_USB_BELKIN */ - + #ifdef CONFIG_USB_EPSON2888 #define HAVE_HARDWARE @@ -167,7 +167,7 @@ static const struct driver_info epson2888_info = { #endif /* CONFIG_USB_EPSON2888 */ - + /*------------------------------------------------------------------------- * * info from Jonathan McDowell <noodles@earth.li> @@ -181,7 +181,7 @@ static const struct driver_info kc2190_info = { }; #endif /* CONFIG_USB_KC2190 */ - + #ifdef CONFIG_USB_ARMLINUX #define HAVE_HARDWARE @@ -222,7 +222,7 @@ static const struct driver_info blob_info = { #endif /* CONFIG_USB_ARMLINUX */ - + /*-------------------------------------------------------------------------*/ #ifndef HAVE_HARDWARE diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 9b2bc1993ece..c9efb7df892e 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -221,7 +221,7 @@ struct kaweth_device dma_addr_t rxbufferhandle; __u8 *rx_buf; - + struct sk_buff *tx_skb; __u8 *firmware_buf; diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 17c9c63b8eeb..2c82fbcaab22 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -18,7 +18,7 @@ /* - * Prolific PL-2301/PL-2302 driver ... http://www.prolific.com.tw/ + * Prolific PL-2301/PL-2302 driver ... http://www.prolific.com.tw/ * * The protocol and handshaking used here should be bug-compatible * with the Linux 2.2 "plusb" driver, by Deti Fliegl. diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index e415465068ca..aaa89b4cfd50 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -381,7 +381,7 @@ insanity: } EXPORT_SYMBOL_GPL(usbnet_update_max_qlen); - + /*------------------------------------------------------------------------- * * Network Device Driver (peer link to "Host Device", from USB host) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 356cf8dd4164..ec8e1b3108c3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -242,9 +242,15 @@ struct virtnet_info { /* Packet virtio header size */ u8 hdr_len; - /* Work struct for refilling if we run low on memory. */ + /* Work struct for delayed refilling if we run low on memory. */ struct delayed_work refill; + /* Is delayed refill enabled? */ + bool refill_enabled; + + /* The lock to synchronize the access to refill_enabled */ + spinlock_t refill_lock; + /* Work struct for config space updates */ struct work_struct config_work; @@ -348,6 +354,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) return p; } +static void enable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = true; + spin_unlock_bh(&vi->refill_lock); +} + +static void disable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = false; + spin_unlock_bh(&vi->refill_lock); +} + static void virtqueue_napi_schedule(struct napi_struct *napi, struct virtqueue *vq) { @@ -1527,8 +1547,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, } if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { - if (!try_fill_recv(vi, rq, GFP_ATOMIC)) - schedule_delayed_work(&vi->refill, 0); + if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { + spin_lock(&vi->refill_lock); + if (vi->refill_enabled) + schedule_delayed_work(&vi->refill, 0); + spin_unlock(&vi->refill_lock); + } } u64_stats_update_begin(&rq->stats.syncp); @@ -1651,6 +1675,8 @@ static int virtnet_open(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i, err; + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) /* Make sure we have some buffers: if oom use wq. */ @@ -2033,6 +2059,8 @@ static int virtnet_close(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i; + /* Make sure NAPI doesn't schedule refill work */ + disable_delayed_refill(vi); /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); @@ -2792,6 +2820,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); + enable_delayed_refill(vi); + if (netif_running(vi->dev)) { err = virtnet_open(vi->dev); if (err) @@ -3535,6 +3565,7 @@ static int virtnet_probe(struct virtio_device *vdev) vdev->priv = vi; INIT_WORK(&vi->config_work, virtnet_config_changed_work); + spin_lock_init(&vi->refill_lock); /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index dd831adbc1d1..53b3b241e027 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2075,17 +2075,8 @@ vmxnet3_poll_rx_only(struct napi_struct *napi, int budget) rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget); if (rxd_done < budget) { - struct Vmxnet3_RxCompDesc *rcd; -#ifdef __BIG_ENDIAN_BITFIELD - struct Vmxnet3_RxCompDesc rxComp; -#endif napi_complete_done(napi, rxd_done); vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx); - /* after unmasking the interrupt, check if any descriptors were completed */ - vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, - &rxComp); - if (rcd->gen == rq->comp_ring.gen && napi_reschedule(napi)) - vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx); } return rxd_done; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 8b0710b576c2..90811ab851fd 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2243,7 +2243,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device struct vxlan_sock *sock4, struct sk_buff *skb, int oif, u8 tos, __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport, - struct dst_cache *dst_cache, + __u8 flow_flags, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); @@ -2270,6 +2270,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device fl4.saddr = *saddr; fl4.fl4_dport = dport; fl4.fl4_sport = sport; + fl4.flowi4_flags = flow_flags; rt = ip_route_output_key(vxlan->net, &fl4); if (!IS_ERR(rt)) { @@ -2459,7 +2460,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int pkt_len = skb->len; __be16 src_port = 0, dst_port; struct dst_entry *ndst = NULL; - __u8 tos, ttl; + __u8 tos, ttl, flow_flags = 0; int ifindex; int err; u32 flags = vxlan->cfg.flags; @@ -2525,6 +2526,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } dst = &remote_ip; dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; + flow_flags = info->key.flow_flags; vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; @@ -2555,7 +2557,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos, dst->sin.sin_addr.s_addr, &local_ip.sin.sin_addr.s_addr, - dst_port, src_port, + dst_port, src_port, flow_flags, dst_cache, info); if (IS_ERR(rt)) { err = PTR_ERR(rt); @@ -3061,7 +3063,8 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, info->key.u.ipv4.dst, &info->key.u.ipv4.src, dport, sport, - &info->dst_cache, info); + info->key.flow_flags, &info->dst_cache, + info); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index f52960d2dfbe..bff144c97e66 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -32,7 +32,7 @@ config DEBUG_PINCTRL Say Y here to add some extra checks and diagnostics to PINCTRL calls. config PINCTRL_AMD - tristate "AMD GPIO pin control" + bool "AMD GPIO pin control" depends on HAS_IOMEM depends on ACPI || COMPILE_TEST select GPIOLIB diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index a140b6bfbfaa..bcde042d29dc 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -102,7 +102,7 @@ struct armada_37xx_pinctrl { struct device *dev; struct gpio_chip gpio_chip; struct irq_chip irq_chip; - spinlock_t irq_lock; + raw_spinlock_t irq_lock; struct pinctrl_desc pctl; struct pinctrl_dev *pctl_dev; struct armada_37xx_pin_group *groups; @@ -523,9 +523,9 @@ static void armada_37xx_irq_ack(struct irq_data *d) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); writel(d->mask, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } static void armada_37xx_irq_mask(struct irq_data *d) @@ -536,10 +536,10 @@ static void armada_37xx_irq_mask(struct irq_data *d) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); val = readl(info->base + reg); writel(val & ~d->mask, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } static void armada_37xx_irq_unmask(struct irq_data *d) @@ -550,10 +550,10 @@ static void armada_37xx_irq_unmask(struct irq_data *d) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); val = readl(info->base + reg); writel(val | d->mask, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on) @@ -564,14 +564,14 @@ static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); val = readl(info->base + reg); if (on) val |= (BIT(d->hwirq % GPIO_PER_REG)); else val &= ~(BIT(d->hwirq % GPIO_PER_REG)); writel(val, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return 0; } @@ -583,7 +583,7 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type) u32 val, reg = IRQ_POL; unsigned long flags; - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); armada_37xx_irq_update_reg(®, d); val = readl(info->base + reg); switch (type) { @@ -607,11 +607,11 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type) break; } default: - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return -EINVAL; } writel(val, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return 0; } @@ -626,7 +626,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info, regmap_read(info->regmap, INPUT_VAL + 4*reg_idx, &l); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); p = readl(info->base + IRQ_POL + 4 * reg_idx); if ((p ^ l) & (1 << bit_num)) { /* @@ -647,7 +647,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info, ret = -1; } - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return ret; } @@ -664,11 +664,11 @@ static void armada_37xx_irq_handler(struct irq_desc *desc) u32 status; unsigned long flags; - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); status = readl_relaxed(info->base + IRQ_STATUS + 4 * i); /* Manage only the interrupt that was enabled */ status &= readl_relaxed(info->base + IRQ_EN + 4 * i); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); while (status) { u32 hwirq = ffs(status) - 1; u32 virq = irq_find_mapping(d, hwirq + @@ -695,12 +695,12 @@ static void armada_37xx_irq_handler(struct irq_desc *desc) update_status: /* Update status in case a new IRQ appears */ - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); status = readl_relaxed(info->base + IRQ_STATUS + 4 * i); /* Manage only the interrupt that was enabled */ status &= readl_relaxed(info->base + IRQ_EN + 4 * i); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } } chained_irq_exit(chip, desc); @@ -731,7 +731,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, struct device *dev = &pdev->dev; unsigned int i, nr_irq_parent; - spin_lock_init(&info->irq_lock); + raw_spin_lock_init(&info->irq_lock); nr_irq_parent = of_irq_count(np); if (!nr_irq_parent) { @@ -1107,25 +1107,40 @@ static const struct of_device_id armada_37xx_pinctrl_of_match[] = { { }, }; +static const struct regmap_config armada_37xx_pinctrl_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .use_raw_spinlock = true, +}; + static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev) { struct armada_37xx_pinctrl *info; struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; struct regmap *regmap; + void __iomem *base; int ret; + base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (IS_ERR(base)) { + dev_err(dev, "failed to ioremap base address: %pe\n", base); + return PTR_ERR(base); + } + + regmap = devm_regmap_init_mmio(dev, base, + &armada_37xx_pinctrl_regmap_config); + if (IS_ERR(regmap)) { + dev_err(dev, "failed to create regmap: %pe\n", regmap); + return PTR_ERR(regmap); + } + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->dev = dev; - - regmap = syscon_node_to_regmap(np); - if (IS_ERR(regmap)) - return dev_err_probe(dev, PTR_ERR(regmap), "cannot get regmap\n"); info->regmap = regmap; - info->data = of_device_get_match_data(dev); ret = armada_37xx_pinctrl_register(pdev, info); diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index 5f4a8c5c6650..dfc8ea9f3843 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -29,19 +29,12 @@ #define ocelot_clrsetbits(addr, clear, set) \ writel((readl(addr) & ~(clear)) | (set), (addr)) -/* PINCONFIG bits (sparx5 only) */ enum { PINCONF_BIAS, PINCONF_SCHMITT, PINCONF_DRIVE_STRENGTH, }; -#define BIAS_PD_BIT BIT(4) -#define BIAS_PU_BIT BIT(3) -#define BIAS_BITS (BIAS_PD_BIT|BIAS_PU_BIT) -#define SCHMITT_BIT BIT(2) -#define DRIVE_BITS GENMASK(1, 0) - /* GPIO standard registers */ #define OCELOT_GPIO_OUT_SET 0x0 #define OCELOT_GPIO_OUT_CLR 0x4 @@ -321,6 +314,13 @@ struct ocelot_pin_caps { unsigned char a_functions[OCELOT_FUNC_PER_PIN]; /* Additional functions */ }; +struct ocelot_pincfg_data { + u8 pd_bit; + u8 pu_bit; + u8 drive_bits; + u8 schmitt_bit; +}; + struct ocelot_pinctrl { struct device *dev; struct pinctrl_dev *pctl; @@ -328,10 +328,16 @@ struct ocelot_pinctrl { struct regmap *map; struct regmap *pincfg; struct pinctrl_desc *desc; + const struct ocelot_pincfg_data *pincfg_data; struct ocelot_pmx_func func[FUNC_MAX]; u8 stride; }; +struct ocelot_match_data { + struct pinctrl_desc desc; + struct ocelot_pincfg_data pincfg_data; +}; + #define LUTON_P(p, f0, f1) \ static struct ocelot_pin_caps luton_pin_##p = { \ .pin = p, \ @@ -1325,24 +1331,27 @@ static int ocelot_hw_get_value(struct ocelot_pinctrl *info, int ret = -EOPNOTSUPP; if (info->pincfg) { + const struct ocelot_pincfg_data *opd = info->pincfg_data; u32 regcfg; - ret = regmap_read(info->pincfg, pin, ®cfg); + ret = regmap_read(info->pincfg, + pin * regmap_get_reg_stride(info->pincfg), + ®cfg); if (ret) return ret; ret = 0; switch (reg) { case PINCONF_BIAS: - *val = regcfg & BIAS_BITS; + *val = regcfg & (opd->pd_bit | opd->pu_bit); break; case PINCONF_SCHMITT: - *val = regcfg & SCHMITT_BIT; + *val = regcfg & opd->schmitt_bit; break; case PINCONF_DRIVE_STRENGTH: - *val = regcfg & DRIVE_BITS; + *val = regcfg & opd->drive_bits; break; default: @@ -1359,14 +1368,18 @@ static int ocelot_pincfg_clrsetbits(struct ocelot_pinctrl *info, u32 regaddr, u32 val; int ret; - ret = regmap_read(info->pincfg, regaddr, &val); + ret = regmap_read(info->pincfg, + regaddr * regmap_get_reg_stride(info->pincfg), + &val); if (ret) return ret; val &= ~clrbits; val |= setbits; - ret = regmap_write(info->pincfg, regaddr, val); + ret = regmap_write(info->pincfg, + regaddr * regmap_get_reg_stride(info->pincfg), + val); return ret; } @@ -1379,23 +1392,27 @@ static int ocelot_hw_set_value(struct ocelot_pinctrl *info, int ret = -EOPNOTSUPP; if (info->pincfg) { + const struct ocelot_pincfg_data *opd = info->pincfg_data; ret = 0; switch (reg) { case PINCONF_BIAS: - ret = ocelot_pincfg_clrsetbits(info, pin, BIAS_BITS, + ret = ocelot_pincfg_clrsetbits(info, pin, + opd->pd_bit | opd->pu_bit, val); break; case PINCONF_SCHMITT: - ret = ocelot_pincfg_clrsetbits(info, pin, SCHMITT_BIT, + ret = ocelot_pincfg_clrsetbits(info, pin, + opd->schmitt_bit, val); break; case PINCONF_DRIVE_STRENGTH: if (val <= 3) ret = ocelot_pincfg_clrsetbits(info, pin, - DRIVE_BITS, val); + opd->drive_bits, + val); else ret = -EINVAL; break; @@ -1425,17 +1442,20 @@ static int ocelot_pinconf_get(struct pinctrl_dev *pctldev, if (param == PIN_CONFIG_BIAS_DISABLE) val = (val == 0); else if (param == PIN_CONFIG_BIAS_PULL_DOWN) - val = (val & BIAS_PD_BIT ? true : false); + val = !!(val & info->pincfg_data->pd_bit); else /* PIN_CONFIG_BIAS_PULL_UP */ - val = (val & BIAS_PU_BIT ? true : false); + val = !!(val & info->pincfg_data->pu_bit); break; case PIN_CONFIG_INPUT_SCHMITT_ENABLE: + if (!info->pincfg_data->schmitt_bit) + return -EOPNOTSUPP; + err = ocelot_hw_get_value(info, pin, PINCONF_SCHMITT, &val); if (err) return err; - val = (val & SCHMITT_BIT ? true : false); + val = !!(val & info->pincfg_data->schmitt_bit); break; case PIN_CONFIG_DRIVE_STRENGTH: @@ -1479,6 +1499,7 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, unsigned long *configs, unsigned int num_configs) { struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); + const struct ocelot_pincfg_data *opd = info->pincfg_data; u32 param, arg, p; int cfg, err = 0; @@ -1491,8 +1512,8 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, case PIN_CONFIG_BIAS_PULL_UP: case PIN_CONFIG_BIAS_PULL_DOWN: arg = (param == PIN_CONFIG_BIAS_DISABLE) ? 0 : - (param == PIN_CONFIG_BIAS_PULL_UP) ? BIAS_PU_BIT : - BIAS_PD_BIT; + (param == PIN_CONFIG_BIAS_PULL_UP) ? + opd->pu_bit : opd->pd_bit; err = ocelot_hw_set_value(info, pin, PINCONF_BIAS, arg); if (err) @@ -1501,7 +1522,10 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, break; case PIN_CONFIG_INPUT_SCHMITT_ENABLE: - arg = arg ? SCHMITT_BIT : 0; + if (!opd->schmitt_bit) + return -EOPNOTSUPP; + + arg = arg ? opd->schmitt_bit : 0; err = ocelot_hw_set_value(info, pin, PINCONF_SCHMITT, arg); if (err) @@ -1562,69 +1586,94 @@ static const struct pinctrl_ops ocelot_pctl_ops = { .dt_free_map = pinconf_generic_dt_free_map, }; -static struct pinctrl_desc luton_desc = { - .name = "luton-pinctrl", - .pins = luton_pins, - .npins = ARRAY_SIZE(luton_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data luton_desc = { + .desc = { + .name = "luton-pinctrl", + .pins = luton_pins, + .npins = ARRAY_SIZE(luton_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc serval_desc = { - .name = "serval-pinctrl", - .pins = serval_pins, - .npins = ARRAY_SIZE(serval_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data serval_desc = { + .desc = { + .name = "serval-pinctrl", + .pins = serval_pins, + .npins = ARRAY_SIZE(serval_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc ocelot_desc = { - .name = "ocelot-pinctrl", - .pins = ocelot_pins, - .npins = ARRAY_SIZE(ocelot_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data ocelot_desc = { + .desc = { + .name = "ocelot-pinctrl", + .pins = ocelot_pins, + .npins = ARRAY_SIZE(ocelot_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc jaguar2_desc = { - .name = "jaguar2-pinctrl", - .pins = jaguar2_pins, - .npins = ARRAY_SIZE(jaguar2_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data jaguar2_desc = { + .desc = { + .name = "jaguar2-pinctrl", + .pins = jaguar2_pins, + .npins = ARRAY_SIZE(jaguar2_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc servalt_desc = { - .name = "servalt-pinctrl", - .pins = servalt_pins, - .npins = ARRAY_SIZE(servalt_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data servalt_desc = { + .desc = { + .name = "servalt-pinctrl", + .pins = servalt_pins, + .npins = ARRAY_SIZE(servalt_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc sparx5_desc = { - .name = "sparx5-pinctrl", - .pins = sparx5_pins, - .npins = ARRAY_SIZE(sparx5_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .confops = &ocelot_confops, - .owner = THIS_MODULE, +static struct ocelot_match_data sparx5_desc = { + .desc = { + .name = "sparx5-pinctrl", + .pins = sparx5_pins, + .npins = ARRAY_SIZE(sparx5_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .confops = &ocelot_confops, + .owner = THIS_MODULE, + }, + .pincfg_data = { + .pd_bit = BIT(4), + .pu_bit = BIT(3), + .drive_bits = GENMASK(1, 0), + .schmitt_bit = BIT(2), + }, }; -static struct pinctrl_desc lan966x_desc = { - .name = "lan966x-pinctrl", - .pins = lan966x_pins, - .npins = ARRAY_SIZE(lan966x_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &lan966x_pmx_ops, - .confops = &ocelot_confops, - .owner = THIS_MODULE, +static struct ocelot_match_data lan966x_desc = { + .desc = { + .name = "lan966x-pinctrl", + .pins = lan966x_pins, + .npins = ARRAY_SIZE(lan966x_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &lan966x_pmx_ops, + .confops = &ocelot_confops, + .owner = THIS_MODULE, + }, + .pincfg_data = { + .pd_bit = BIT(3), + .pu_bit = BIT(2), + .drive_bits = GENMASK(1, 0), + }, }; static int ocelot_create_group_func_map(struct device *dev, @@ -1890,7 +1939,8 @@ static const struct of_device_id ocelot_pinctrl_of_match[] = { {}, }; -static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) +static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev, + const struct ocelot_pinctrl *info) { void __iomem *base; @@ -1898,7 +1948,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) .reg_bits = 32, .val_bits = 32, .reg_stride = 4, - .max_register = 32, + .max_register = info->desc->npins * 4, .name = "pincfg", }; @@ -1913,6 +1963,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) static int ocelot_pinctrl_probe(struct platform_device *pdev) { + const struct ocelot_match_data *data; struct device *dev = &pdev->dev; struct ocelot_pinctrl *info; struct reset_control *reset; @@ -1929,7 +1980,16 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev) if (!info) return -ENOMEM; - info->desc = (struct pinctrl_desc *)device_get_match_data(dev); + data = device_get_match_data(dev); + if (!data) + return -EINVAL; + + info->desc = devm_kmemdup(dev, &data->desc, sizeof(*info->desc), + GFP_KERNEL); + if (!info->desc) + return -ENOMEM; + + info->pincfg_data = &data->pincfg_data; reset = devm_reset_control_get_optional_shared(dev, "switch"); if (IS_ERR(reset)) @@ -1956,7 +2016,7 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev) /* Pinconf registers */ if (info->desc->confops) { - pincfg = ocelot_pinctrl_create_pincfg(pdev); + pincfg = ocelot_pinctrl_create_pincfg(pdev, info); if (IS_ERR(pincfg)) dev_dbg(dev, "Failed to create pincfg regmap\n"); else diff --git a/drivers/pinctrl/ralink/pinctrl-ralink.c b/drivers/pinctrl/ralink/pinctrl-ralink.c index 63429a287434..770862f45b3f 100644 --- a/drivers/pinctrl/ralink/pinctrl-ralink.c +++ b/drivers/pinctrl/ralink/pinctrl-ralink.c @@ -266,6 +266,8 @@ static int ralink_pinctrl_pins(struct ralink_priv *p) p->func[i]->pin_count, sizeof(int), GFP_KERNEL); + if (!p->func[i]->pins) + return -ENOMEM; for (j = 0; j < p->func[i]->pin_count; j++) p->func[i]->pins[j] = p->func[i]->pin_first + j; diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c index 3ba47040ac42..2b3335ab56c6 100644 --- a/drivers/pinctrl/sunplus/sppctl.c +++ b/drivers/pinctrl/sunplus/sppctl.c @@ -871,6 +871,9 @@ static int sppctl_dt_node_to_map(struct pinctrl_dev *pctldev, struct device_node } *map = kcalloc(*num_maps + nmG, sizeof(**map), GFP_KERNEL); + if (*map == NULL) + return -ENOMEM; + for (i = 0; i < (*num_maps); i++) { dt_pin = be32_to_cpu(list[i]); pin_num = FIELD_GET(GENMASK(31, 24), dt_pin); diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 458218f88c5e..fe4971b65c64 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -176,6 +176,7 @@ config PTP_1588_CLOCK_OCP depends on !S390 depends on COMMON_CLK select NET_DEVLINK + select CRC16 help This driver adds support for an OpenCompute time card. diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 5f7e28de8b15..d34bb6ec1490 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -409,20 +409,19 @@ static void ism_create_system_eid(void) memcpy(&SYSTEM_EID.type, tmp, 4); } -static void ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) +static u8 *ism_get_system_eid(void) { - *eid = &SYSTEM_EID.seid_string[0]; + return SYSTEM_EID.seid_string; } static u16 ism_get_chid(struct smcd_dev *smcd) { - struct ism_dev *ismdev; + struct ism_dev *ism = (struct ism_dev *)smcd->priv; - ismdev = (struct ism_dev *)smcd->priv; - if (!ismdev || !ismdev->pdev) + if (!ism || !ism->pdev) return 0; - return to_zpci(ismdev->pdev)->pchid; + return to_zpci(ism->pdev)->pchid; } static void ism_handle_event(struct ism_dev *ism) @@ -444,6 +443,7 @@ static irqreturn_t ism_handle_irq(int irq, void *data) struct ism_dev *ism = data; unsigned long bit, end; unsigned long *bv; + u16 dmbemask; bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET]; end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET; @@ -457,9 +457,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data) break; clear_bit_inv(bit, bv); + dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET]; ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0; barrier(); - smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET); + smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET, dmbemask); } if (ism->sba->e) { diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9e54fe76a9b2..35d4b398c197 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3565,7 +3565,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, if (!atomic_read(&queue->set_pci_flags_count)) { /* * there's no outstanding PCI any more, so we - * have to request a PCI to be sure the the PCI + * have to request a PCI to be sure the PCI * will wake at some time in the future then we * can flush packed buffers that might still be * hanging around, which can happen if no diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 775c0bf2f923..0933948d7df3 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1138,10 +1138,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr, struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); /* if an error occurred and we have an active dma, then terminate */ - dmaengine_terminate_sync(ctlr->dma_tx); - bs->tx_dma_active = false; - dmaengine_terminate_sync(ctlr->dma_rx); - bs->rx_dma_active = false; + if (ctlr->dma_tx) { + dmaengine_terminate_sync(ctlr->dma_tx); + bs->tx_dma_active = false; + } + if (ctlr->dma_rx) { + dmaengine_terminate_sync(ctlr->dma_rx); + bs->rx_dma_active = false; + } bcm2835_spi_undo_prologue(bs); /* and reset */ diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 31d778e9d255..6a7f7df1e776 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -69,7 +69,7 @@ #define CDNS_SPI_BAUD_DIV_SHIFT 3 /* Baud rate divisor shift in CR */ #define CDNS_SPI_SS_SHIFT 10 /* Slave Select field shift in CR */ #define CDNS_SPI_SS0 0x1 /* Slave Select zero */ -#define CDNS_SPI_NOSS 0x3C /* No Slave select */ +#define CDNS_SPI_NOSS 0xF /* No Slave select */ /* * SPI Interrupt Registers bit Masks diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 7a014eeec2d0..411b1307b7fd 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -613,6 +613,10 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx, rspi->dma_callbacked, HZ); if (ret > 0 && rspi->dma_callbacked) { ret = 0; + if (tx) + dmaengine_synchronize(rspi->ctlr->dma_tx); + if (rx) + dmaengine_synchronize(rspi->ctlr->dma_rx); } else { if (!ret) { dev_err(&rspi->ctlr->dev, "DMA timeout\n"); diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 90ce16b6e05f..f422f9c58ba7 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -632,16 +632,19 @@ static int __init sev_guest_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct snp_guest_dev *snp_dev; struct miscdevice *misc; + void __iomem *mapping; int ret; if (!dev->platform_data) return -ENODEV; data = (struct sev_guest_platform_data *)dev->platform_data; - layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE); - if (!layout) + mapping = ioremap_encrypted(data->secrets_gpa, PAGE_SIZE); + if (!mapping) return -ENODEV; + layout = (__force void *)mapping; + ret = -ENOMEM; snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL); if (!snp_dev) @@ -706,7 +709,7 @@ e_free_response: e_free_request: free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg)); e_unmap: - iounmap(layout); + iounmap(mapping); return ret; } diff --git a/fs/io_uring.c b/fs/io_uring.c index a01ea49f3017..e8e769be9ed0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1738,6 +1738,14 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) return; /* + * READV uses fields in `struct io_rw` (len/addr) to stash the selected + * buffer data. However if that buffer is recycled the original request + * data stored in addr is lost. Therefore forbid recycling for now. + */ + if (req->opcode == IORING_OP_READV) + return; + + /* * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear * the flag and hence ensure that bl->head doesn't get incremented. * If the tail has already been incremented, hang on to it. @@ -12931,7 +12939,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_buf_ring *br; struct io_uring_buf_reg reg; - struct io_buffer_list *bl; + struct io_buffer_list *bl, *free_bl = NULL; struct page **pages; int nr_pages; @@ -12963,7 +12971,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (bl->buf_nr_pages || !list_empty(&bl->buf_list)) return -EEXIST; } else { - bl = kzalloc(sizeof(*bl), GFP_KERNEL); + free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); if (!bl) return -ENOMEM; } @@ -12972,7 +12980,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct_size(br, bufs, reg.ring_entries), &nr_pages); if (IS_ERR(pages)) { - kfree(bl); + kfree(free_bl); return PTR_ERR(pages); } diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 4de597a83b88..52615e6090e1 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, a = (ATTR_RECORD*)((u8*)ctx->attr + le32_to_cpu(ctx->attr->length)); for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + - le32_to_cpu(ctx->mrec->bytes_allocated)) + u8 *mrec_end = (u8 *)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated); + u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + + a->name_length * sizeof(ntfschar); + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || + name_end > mrec_end) break; ctx->attr = a; if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 337527571461..740b64238312 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -277,7 +277,6 @@ enum ocfs2_mount_options OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ - OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ }; #define OCFS2_OSB_SOFT_RO 0x0001 @@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) static inline int ocfs2_mount_local(struct ocfs2_super *osb) { - return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) - || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); + return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); } static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 0b0ae3ebb0cf..da7718cef735 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, int i, ret = -ENOSPC; if ((preferred >= 0) && (preferred < si->si_num_slots)) { - if (!si->si_slots[preferred].sl_valid || - !si->si_slots[preferred].sl_node_num) { + if (!si->si_slots[preferred].sl_valid) { ret = preferred; goto out; } } for(i = 0; i < si->si_num_slots; i++) { - if (!si->si_slots[i].sl_valid || - !si->si_slots[i].sl_node_num) { + if (!si->si_slots[i].sl_valid) { ret = i; break; } @@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - if (ocfs2_mount_local(osb)) - /* use slot 0 directly in local mode */ - slot = 0; - else { - /* search for ourselves first and take the slot if it already - * exists. Perhaps we need to mark this in a variable for our - * own journal recovery? Possibly not, though we certainly - * need to warn to the user */ - slot = __ocfs2_node_num_to_slot(si, osb->node_num); + /* search for ourselves first and take the slot if it already + * exists. Perhaps we need to mark this in a variable for our + * own journal recovery? Possibly not, though we certainly + * need to warn to the user */ + slot = __ocfs2_node_num_to_slot(si, osb->node_num); + if (slot < 0) { + /* if no slot yet, then just take 1st available + * one. */ + slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); if (slot < 0) { - /* if no slot yet, then just take 1st available - * one. */ - slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); - if (slot < 0) { - spin_unlock(&osb->osb_lock); - mlog(ML_ERROR, "no free slots available!\n"); - status = -EINVAL; - goto bail; - } - } else - printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " - "already allocated to this node!\n", - slot, osb->dev_str); - } + spin_unlock(&osb->osb_lock); + mlog(ML_ERROR, "no free slots available!\n"); + status = -EINVAL; + goto bail; + } + } else + printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " + "allocated to this node!\n", slot, osb->dev_str); ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f7298816d8d9..438be028935d 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -172,7 +172,6 @@ enum { Opt_dir_resv_level, Opt_journal_async_commit, Opt_err_cont, - Opt_nocluster, Opt_err, }; @@ -206,7 +205,6 @@ static const match_table_t tokens = { {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_err_cont, "errors=continue"}, - {Opt_nocluster, "nocluster"}, {Opt_err, NULL} }; @@ -618,13 +616,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto out; } - tmp = OCFS2_MOUNT_NOCLUSTER; - if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { - ret = -EINVAL; - mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); - goto out; - } - tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE; if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { @@ -865,7 +856,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, } if (ocfs2_userspace_stack(osb) && - !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && strncmp(osb->osb_cluster_stack, mopt->cluster_stack, OCFS2_STACK_LABEL_LEN)) { mlog(ML_ERROR, @@ -1137,11 +1127,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); - if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && - !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) - printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " - "without cluster aware mode.\n", osb->dev_str); - atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); @@ -1452,9 +1437,6 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_journal_async_commit: mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; break; - case Opt_nocluster: - mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; - break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1566,9 +1548,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) seq_printf(s, ",journal_async_commit"); - if (opts & OCFS2_MOUNT_NOCLUSTER) - seq_printf(s, ",nocluster"); - return 0; } diff --git a/fs/read_write.c b/fs/read_write.c index e0777eefd846..397da0236607 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1263,6 +1263,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, count, fl); file_end_write(out.file); } else { + if (out.file->f_flags & O_NONBLOCK) + fl |= SPLICE_F_NONBLOCK; + retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index e943370107d0..de86f5b2859f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -192,17 +192,19 @@ static inline void msg_init(struct uffd_msg *msg) } static inline struct uffd_msg userfault_msg(unsigned long address, + unsigned long real_address, unsigned int flags, unsigned long reason, unsigned int features) { struct uffd_msg msg; + msg_init(&msg); msg.event = UFFD_EVENT_PAGEFAULT; - if (!(features & UFFD_FEATURE_EXACT_ADDRESS)) - address &= PAGE_MASK; - msg.arg.pagefault.address = address; + msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ? + real_address : address; + /* * These flags indicate why the userfault occurred: * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault. @@ -488,8 +490,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; - uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason, - ctx->features); + uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags, + reason, ctx->features); uwq.ctx = ctx; uwq.waken = false; diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 7ce93aaf69f8..98954dda5734 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, } #endif -#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED extern int devmem_is_allowed(unsigned long pfn); -#endif #endif /* __KERNEL__ */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index cb2167c89eee..492dce43236e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -368,9 +368,6 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_mm(tlb->mm); } -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 0fca8f38bee4..addb135eeea6 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -28,7 +28,7 @@ #include <linux/dma-fence.h> #include <linux/completion.h> #include <linux/xarray.h> -#include <linux/irq_work.h> +#include <linux/workqueue.h> #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) @@ -295,7 +295,7 @@ struct drm_sched_job { */ union { struct dma_fence_cb finish_cb; - struct irq_work work; + struct work_struct work; }; uint64_t id; diff --git a/include/linux/atm_tcp.h b/include/linux/atm_tcp.h index c8ecf6f68fb5..2558439d849b 100644 --- a/include/linux/atm_tcp.h +++ b/include/linux/atm_tcp.h @@ -9,6 +9,8 @@ #include <uapi/linux/atm_tcp.h> +struct atm_vcc; +struct module; struct atm_tcp_ops { int (*attach)(struct atm_vcc *vcc,int itf); diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index 4359fb0221cf..50be7cbd93a5 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -3,6 +3,11 @@ #ifndef __TAG_QCA_H #define __TAG_QCA_H +#include <linux/types.h> + +struct dsa_switch; +struct sk_buff; + #define QCA_HDR_LEN 2 #define QCA_HDR_VERSION 0x2 diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9dc01f7ab5b4..07414c241e65 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -23,6 +23,10 @@ #ifdef __KERNEL__ +struct neigh_parms; +struct net_device; +struct sk_buff; + struct hippi_cb { __u32 ifield; }; diff --git a/include/linux/if_eql.h b/include/linux/if_eql.h index d75601d613cc..07f9b660b741 100644 --- a/include/linux/if_eql.h +++ b/include/linux/if_eql.h @@ -21,6 +21,7 @@ #include <linux/timer.h> #include <linux/spinlock.h> +#include <net/net_trackers.h> #include <uapi/linux/if_eql.h> typedef struct slave { diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 408539d5ea5f..0404f5bf4f30 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -2,6 +2,10 @@ #ifndef _LINUX_IF_HSR_H_ #define _LINUX_IF_HSR_H_ +#include <linux/types.h> + +struct net_device; + /* used to differentiate various protocols */ enum hsr_version { HSR_V0 = 0, diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 10e7521ecb6c..839d1e48b85e 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -5,6 +5,8 @@ #ifndef _LINUX_IF_RMNET_H_ #define _LINUX_IF_RMNET_H_ +#include <linux/types.h> + struct rmnet_map_header { u8 flags; /* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */ u8 mux_id; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 915a187cfabd..553552fa635c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -2,14 +2,18 @@ #ifndef _LINUX_IF_TAP_H_ #define _LINUX_IF_TAP_H_ +#include <net/sock.h> +#include <linux/skb_array.h> + +struct file; +struct socket; + #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); struct ptr_ring *tap_get_ptr_ring(struct file *file); #else #include <linux/err.h> #include <linux/errno.h> -struct file; -struct socket; static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); @@ -20,9 +24,6 @@ static inline struct ptr_ring *tap_get_ptr_ring(struct file *f) } #endif /* CONFIG_TAP */ -#include <net/sock.h> -#include <linux/skb_array.h> - /* * Maximum times a tap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. diff --git a/include/linux/mdio/mdio-xgene.h b/include/linux/mdio/mdio-xgene.h index 8af93ada8b64..9e588965dc83 100644 --- a/include/linux/mdio/mdio-xgene.h +++ b/include/linux/mdio/mdio-xgene.h @@ -8,6 +8,10 @@ #ifndef __MDIO_XGENE_H__ #define __MDIO_XGENE_H__ +#include <linux/bits.h> +#include <linux/spinlock.h> +#include <linux/types.h> + #define BLOCK_XG_MDIO_CSR_OFFSET 0x5000 #define BLOCK_DIAG_CSR_OFFSET 0xd000 #define XGENET_CONFIG_REG_ADDR 0x20 diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6b..7898e29bcfb5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page(const struct page *page) #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -bool __put_devmap_managed_page(struct page *page); -static inline bool put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page_refs(struct page *page, int refs); +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { if (!static_branch_unlikely(&devmap_managed_key)) return false; if (!is_zone_device_page(page)) return false; - return __put_devmap_managed_page(page); + return __put_devmap_managed_page_refs(page, refs); } - #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page(struct page *page) +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { return false; } #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ +static inline bool put_devmap_managed_page(struct page *page) +{ + return put_devmap_managed_page_refs(page, 1); +} + /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index b22782225f27..cbe5fd1dd2e7 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -8,6 +8,8 @@ #ifndef NL802154_H #define NL802154_H +#include <net/netlink.h> + #define IEEE802154_NL_NAME "802.15.4 MAC" #define IEEE802154_MCAST_COORD_NAME "coordinator" #define IEEE802154_MCAST_BEACON_NAME "beacon" diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 52bc8e487ef7..1acafd86ab13 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -2,6 +2,8 @@ #ifndef __PHY_FIXED_H #define __PHY_FIXED_H +#include <linux/types.h> + struct fixed_phy_status { int link; int speed; @@ -12,6 +14,7 @@ struct fixed_phy_status { struct device_node; struct gpio_desc; +struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); diff --git a/include/linux/ppp-comp.h b/include/linux/ppp-comp.h index 9d3ffc8f5ea6..fb847e47f148 100644 --- a/include/linux/ppp-comp.h +++ b/include/linux/ppp-comp.h @@ -9,7 +9,7 @@ #include <uapi/linux/ppp-comp.h> - +struct compstat; struct module; /* diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index 91f9a928344e..45e6e427ceb8 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -20,6 +20,8 @@ #include <linux/poll.h> #include <net/net_namespace.h> +struct net_device_path; +struct net_device_path_ctx; struct ppp_channel; struct ppp_channel_ops { diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h index 9d2b388fae1a..b7e57fdbd413 100644 --- a/include/linux/ppp_defs.h +++ b/include/linux/ppp_defs.h @@ -11,4 +11,18 @@ #include <uapi/linux/ppp_defs.h> #define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c) + +/** + * ppp_proto_is_valid - checks if PPP protocol is valid + * @proto: PPP protocol + * + * Assumes proto is not compressed. + * Protocol is valid if the value is odd and the least significant bit of the + * most significant octet is 0 (see RFC 1661, section 2). + */ +static inline bool ppp_proto_is_valid(u16 proto) +{ + return !!((proto & 0x0101) == 0x0001); +} + #endif /* _PPP_DEFS_H_ */ diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index f960a719f0d5..c2e28deef33a 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,6 +8,8 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include <linux/types.h> + struct timespec64; struct clocksource; diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h index 51818198c292..7ba643b62c15 100644 --- a/include/linux/ptp_pch.h +++ b/include/linux/ptp_pch.h @@ -10,6 +10,10 @@ #ifndef _PTP_PCH_H_ #define _PTP_PCH_H_ +#include <linux/types.h> + +struct pci_dev; + void pch_ch_control_write(struct pci_dev *pdev, u32 val); u32 pch_ch_event_read(struct pci_dev *pdev); void pch_ch_event_write(struct pci_dev *pdev, u32 val); diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h index b97912fdbae7..79638395bc32 100644 --- a/include/linux/seq_file_net.h +++ b/include/linux/seq_file_net.h @@ -3,6 +3,7 @@ #define __SEQ_FILE_NET_H__ #include <linux/seq_file.h> +#include <net/net_trackers.h> struct net; extern struct net init_net; diff --git a/include/linux/sungem_phy.h b/include/linux/sungem_phy.h index 3a11fa41a131..c505f30e8b68 100644 --- a/include/linux/sungem_phy.h +++ b/include/linux/sungem_phy.h @@ -2,6 +2,8 @@ #ifndef __SUNGEM_PHY_H__ #define __SUNGEM_PHY_H__ +#include <linux/types.h> + struct mii_phy; /* Operations supported by any kind of PHY */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 1b4d72d5e891..b42b72391a8d 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -23,6 +23,12 @@ #ifndef __LINUX_USB_USBNET_H #define __LINUX_USB_USBNET_H +#include <linux/mii.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/usb.h> + /* interface from usbnet core to each USB networking link we handle */ struct usbnet { /* housekeeping */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f7506f08e505..c04f359655b8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); + if (unlikely(!idev)) + return true; + return !!idev->cnf.ignore_routes_with_linkdown; } diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 3c4f550e5a8b..2f766e3437ce 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -847,6 +847,7 @@ enum { }; void l2cap_chan_hold(struct l2cap_chan *c); +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); void l2cap_chan_put(struct l2cap_chan *c); static inline void l2cap_chan_lock(struct l2cap_chan *chan) diff --git a/include/net/devlink.h b/include/net/devlink.h index 780744b550b8..119ed1ffb988 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1509,6 +1509,27 @@ struct devlink_ops { struct devlink_rate *parent, void *priv_child, void *priv_parent, struct netlink_ext_ack *extack); + /** + * selftests_check() - queries if selftest is supported + * @devlink: devlink instance + * @id: test index + * @extack: extack for reporting error messages + * + * Return: true if test is supported by the driver + */ + bool (*selftest_check)(struct devlink *devlink, unsigned int id, + struct netlink_ext_ack *extack); + /** + * selftest_run() - Runs a selftest + * @devlink: devlink instance + * @id: test index + * @extack: extack for reporting error messages + * + * Return: status of the test + */ + enum devlink_selftest_status + (*selftest_run)(struct devlink *devlink, unsigned int id, + struct netlink_ext_ack *extack); }; void *devlink_priv(struct devlink *devlink); @@ -1580,6 +1601,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); void devlink_linecard_activate(struct devlink_linecard *linecard); void devlink_linecard_deactivate(struct devlink_linecard *linecard); +void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink); int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/net/firewire.h b/include/net/firewire.h index 2442d645e412..8fbff8d77865 100644 --- a/include/net/firewire.h +++ b/include/net/firewire.h @@ -13,8 +13,7 @@ union fwnet_hwaddr { __be64 uniq_id; /* EUI-64 */ u8 max_rec; /* max packet size */ u8 sspd; /* max speed */ - __be16 fifo_hi; /* hi 16bits of FIFO addr */ - __be32 fifo_lo; /* lo 32bits of FIFO addr */ + u8 fifo[6]; /* FIFO addr */ } __packed uc; }; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 0f9544a9bb9e..6c74812d64b2 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -277,6 +277,18 @@ struct flow_dissector_key_num_of_vlans { u8 num_of_vlans; }; +/** + * struct flow_dissector_key_pppoe: + * @session_id: pppoe session id + * @ppp_proto: ppp protocol + * @type: pppoe eth type + */ +struct flow_dissector_key_pppoe { + __be16 session_id; + __be16 ppp_proto; + __be16 type; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -307,6 +319,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ + FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index a8d8512b7059..2a9a9e42e7fd 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -76,6 +76,10 @@ struct flow_match_ct { struct flow_dissector_key_ct *key, *mask; }; +struct flow_match_pppoe { + struct flow_dissector_key_pppoe *key, *mask; +}; + struct flow_rule; void flow_rule_match_meta(const struct flow_rule *rule, @@ -122,6 +126,8 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule, struct flow_match_enc_opts *out); void flow_rule_match_ct(const struct flow_rule *rule, struct flow_match_ct *out); +void flow_rule_match_pppoe(const struct flow_rule *rule, + struct flow_match_pppoe *out); enum flow_action_id { FLOW_ACTION_ACCEPT = 0, diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 85cd695e7fd1..ee88f0f1350f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -321,7 +321,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 3 +#define TCP_PINGPONG_THRESH 1 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { @@ -338,14 +338,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } -static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ack.pingpong < U8_MAX) - icsk->icsk_ack.pingpong++; -} - static inline bool inet_csk_has_ulp(struct sock *sk) { return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 20db95055db3..63fac94f9ace 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -54,6 +54,7 @@ struct ip_tunnel_key { __be32 label; /* Flow Label for IPv6 */ __be16 tp_src; __be16 tp_dst; + __u8 flow_flags; }; /* Flags for ip_tunnel_info mode. */ diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h index c4359e203013..ed5b2fa40d32 100644 --- a/include/net/llc_s_st.h +++ b/include/net/llc_s_st.h @@ -12,6 +12,12 @@ * See the GNU General Public License for more details. */ +#include <linux/types.h> +#include <net/llc_s_ac.h> +#include <net/llc_s_ev.h> + +struct llc_sap_state_trans; + #define LLC_NR_SAP_STATES 2 /* size of state table */ /* structures and types */ diff --git a/include/net/smc.h b/include/net/smc.h index 37f829d9c6e5..c926d3313e05 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -72,7 +72,7 @@ struct smcd_ops { int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); - void (*get_system_eid)(struct smcd_dev *dev, u8 **eid); + u8* (*get_system_eid)(void); u16 (*get_chid)(struct smcd_dev *dev); }; @@ -101,5 +101,5 @@ int smcd_register_dev(struct smcd_dev *smcd); void smcd_unregister_dev(struct smcd_dev *smcd); void smcd_free_dev(struct smcd_dev *smcd); void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event); -void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit); +void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask); #endif /* _SMC_H */ diff --git a/include/net/sock.h b/include/net/sock.h index f7ad1a7705e9..a7273b289188 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2823,18 +2823,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_wmem ? */ if (proto->sysctl_wmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); - return *proto->sysctl_wmem; + return READ_ONCE(*proto->sysctl_wmem); } static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_rmem ? */ if (proto->sysctl_rmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); - return *proto->sysctl_rmem; + return READ_ONCE(*proto->sysctl_rmem); } /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) diff --git a/include/net/tcp.h b/include/net/tcp.h index f9e7c85ea829..d10962b9f0d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -673,6 +673,8 @@ void tcp_get_info(struct sock *, struct tcp_info *); int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); +struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); +void tcp_read_done(struct sock *sk, size_t len); void tcp_initialize_rcv_mss(struct sock *sk); @@ -1423,7 +1425,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : diff --git a/include/net/tls.h b/include/net/tls.h index 181c496b01b8..b75b5727abdb 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -108,18 +108,33 @@ struct tls_sw_context_tx { unsigned long tx_bitmask; }; +struct tls_strparser { + struct sock *sk; + + u32 mark : 8; + u32 stopped : 1; + u32 copy_mode : 1; + u32 msg_ready : 1; + + struct strp_msg stm; + + struct sk_buff *anchor; + struct work_struct work; +}; + struct tls_sw_context_rx { struct crypto_aead *aead_recv; struct crypto_wait async_wait; - struct strparser strp; struct sk_buff_head rx_list; /* list of decrypted 'data' records */ void (*saved_data_ready)(struct sock *sk); - struct sk_buff *recv_pkt; u8 reader_present; u8 async_capable:1; u8 zc_capable:1; u8 reader_contended:1; + + struct tls_strparser strp; + atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; @@ -146,6 +161,8 @@ struct tls_offload_context_tx { struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; void (*sk_destruct)(struct sock *sk); + struct work_struct destruct_work; + struct tls_context *ctx; u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index f13d37b60775..1ecdb911add8 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -192,6 +192,7 @@ struct f_owner_ex { #define F_LINUX_SPECIFIC_BASE 1024 +#ifndef HAVE_ARCH_STRUCT_FLOCK struct flock { short l_type; short l_whence; @@ -216,5 +217,6 @@ struct flock64 { __ARCH_FLOCK64_PAD #endif }; +#endif /* HAVE_ARCH_STRUCT_FLOCK */ #endif /* _ASM_GENERIC_FCNTL_H */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3d40a5d72ff..2f24b53a87a5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -136,6 +136,9 @@ enum devlink_command { DEVLINK_CMD_LINECARD_NEW, DEVLINK_CMD_LINECARD_DEL, + DEVLINK_CMD_SELFTESTS_GET, /* can dump */ + DEVLINK_CMD_SELFTESTS_RUN, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -276,6 +279,30 @@ enum { #define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \ (_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1) +enum devlink_attr_selftest_id { + DEVLINK_ATTR_SELFTEST_ID_UNSPEC, + DEVLINK_ATTR_SELFTEST_ID_FLASH, /* flag */ + + __DEVLINK_ATTR_SELFTEST_ID_MAX, + DEVLINK_ATTR_SELFTEST_ID_MAX = __DEVLINK_ATTR_SELFTEST_ID_MAX - 1 +}; + +enum devlink_selftest_status { + DEVLINK_SELFTEST_STATUS_SKIP, + DEVLINK_SELFTEST_STATUS_PASS, + DEVLINK_SELFTEST_STATUS_FAIL +}; + +enum devlink_attr_selftest_result { + DEVLINK_ATTR_SELFTEST_RESULT_UNSPEC, + DEVLINK_ATTR_SELFTEST_RESULT, /* nested */ + DEVLINK_ATTR_SELFTEST_RESULT_ID, /* u32, enum devlink_attr_selftest_id */ + DEVLINK_ATTR_SELFTEST_RESULT_STATUS, /* u8, enum devlink_selftest_status */ + + __DEVLINK_ATTR_SELFTEST_RESULT_MAX, + DEVLINK_ATTR_SELFTEST_RESULT_MAX = __DEVLINK_ATTR_SELFTEST_RESULT_MAX - 1 +}; + /** * enum devlink_trap_action - Packet trap action. * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not @@ -576,6 +603,10 @@ enum devlink_attr { DEVLINK_ATTR_LINECARD_TYPE, /* string */ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ + DEVLINK_ATTR_NESTED_DEVLINK, /* nested */ + + DEVLINK_ATTR_SELFTESTS, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 811897dadcae..860f867c50c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2084,7 +2084,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) -#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 9a2ee1e39fad..c142c0f8ed8a 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -589,6 +589,9 @@ enum { TCA_FLOWER_KEY_NUM_OF_VLANS, /* u8 */ + TCA_FLOWER_KEY_PPPOE_SID, /* be16 */ + TCA_FLOWER_KEY_PPP_PROTO, /* be16 */ + __TCA_FLOWER_MAX, }; diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index eb815e0d0ac3..a9fa777f16de 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -35,6 +35,8 @@ enum { SEG6_IPTUN_MODE_INLINE, SEG6_IPTUN_MODE_ENCAP, SEG6_IPTUN_MODE_L2ENCAP, + SEG6_IPTUN_MODE_ENCAP_RED, + SEG6_IPTUN_MODE_L2ENCAP_RED, }; #endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7ac971ea98d1..7e64447659f3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6643,7 +6643,7 @@ static void btf_snprintf_show(struct btf_show *show, const char *fmt, if (len < 0) { ssnprintf->len_left = 0; ssnprintf->len = len; - } else if (len > ssnprintf->len_left) { + } else if (len >= ssnprintf->len_left) { /* no space, drive on to get length we would have written */ ssnprintf->len_left = 0; ssnprintf->len += len; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1400561efb15..a0e02b009487 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -477,7 +477,7 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; - err = xdp_ok_fwd_dev(dev, xdpf->len); + err = xdp_ok_fwd_dev(dev, xdp_get_frame_len(xdpf)); if (unlikely(err)) return err; @@ -536,7 +536,7 @@ static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) !obj->dev->netdev_ops->ndo_xdp_xmit) return false; - if (xdp_ok_fwd_dev(obj->dev, xdpf->len)) + if (xdp_ok_fwd_dev(obj->dev, xdp_get_frame_len(xdpf))) return false; return true; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 42e387a12694..0f532e6a717f 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -98,7 +98,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd default: ret = -EINVAL; break; - }; + } mutex_unlock(&tr->mutex); return ret; @@ -248,14 +248,17 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) int ret; faddr = ftrace_location((unsigned long)ip); - if (faddr) + if (faddr) { + if (!tr->fops) + return -ENOTSUPP; tr->func.ftrace_managed = true; + } if (bpf_trampoline_module_get(tr)) return -ENOENT; if (tr->func.ftrace_managed) { - ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0); + ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 50ba70f019de..1c304fec89c0 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -511,10 +511,52 @@ static bool srcu_readers_active(struct srcu_struct *ssp) return sum; } -#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. -#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers. -#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances. -#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances. +/* + * We use an adaptive strategy for synchronize_srcu() and especially for + * synchronize_srcu_expedited(). We spin for a fixed time period + * (defined below, boot time configurable) to allow SRCU readers to exit + * their read-side critical sections. If there are still some readers + * after one jiffy, we repeatedly block for one jiffy time periods. + * The blocking time is increased as the grace-period age increases, + * with max blocking time capped at 10 jiffies. + */ +#define SRCU_DEFAULT_RETRY_CHECK_DELAY 5 + +static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY; +module_param(srcu_retry_check_delay, ulong, 0444); + +#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. +#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers. + +#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase + // no-delay instances. +#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase + // no-delay instances. + +#define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low)) +#define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high)) +#define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high)) +// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto +// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay() +// called from process_srcu(). +#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \ + (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY) + +// Maximum per-GP-phase consecutive no-delay instances. +#define SRCU_DEFAULT_MAX_NODELAY_PHASE \ + SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \ + SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \ + SRCU_DEFAULT_MAX_NODELAY_PHASE_HI) + +static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE; +module_param(srcu_max_nodelay_phase, ulong, 0444); + +// Maximum consecutive no-delay instances. +#define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \ + SRCU_DEFAULT_MAX_NODELAY_PHASE : 100) + +static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY; +module_param(srcu_max_nodelay, ulong, 0444); /* * Return grace-period delay, zero if there are expedited grace @@ -522,16 +564,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp) */ static unsigned long srcu_get_delay(struct srcu_struct *ssp) { + unsigned long gpstart; + unsigned long j; unsigned long jbase = SRCU_INTERVAL; if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) jbase = 0; - if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) - jbase += jiffies - READ_ONCE(ssp->srcu_gp_start); - if (!jbase) { - WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); - if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE) - jbase = 1; + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) { + j = jiffies - 1; + gpstart = READ_ONCE(ssp->srcu_gp_start); + if (time_after(j, gpstart)) + jbase += j - gpstart; + if (!jbase) { + WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); + if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase) + jbase = 1; + } } return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase; } @@ -607,15 +655,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) EXPORT_SYMBOL_GPL(__srcu_read_unlock); /* - * We use an adaptive strategy for synchronize_srcu() and especially for - * synchronize_srcu_expedited(). We spin for a fixed time period - * (defined below) to allow SRCU readers to exit their read-side critical - * sections. If there are still some readers after a few microseconds, - * we repeatedly block for 1-millisecond time periods. - */ -#define SRCU_RETRY_CHECK_DELAY 5 - -/* * Start an SRCU grace period. */ static void srcu_gp_start(struct srcu_struct *ssp) @@ -700,7 +739,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp */ static void srcu_gp_end(struct srcu_struct *ssp) { - unsigned long cbdelay; + unsigned long cbdelay = 1; bool cbs; bool last_lvl; int cpu; @@ -720,7 +759,9 @@ static void srcu_gp_end(struct srcu_struct *ssp) spin_lock_irq_rcu_node(ssp); idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - cbdelay = !!srcu_get_delay(ssp); + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) + cbdelay = 0; + WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); rcu_seq_end(&ssp->srcu_gp_seq); gpseq = rcu_seq_current(&ssp->srcu_gp_seq); @@ -921,12 +962,16 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, */ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { + unsigned long curdelay; + + curdelay = !srcu_get_delay(ssp); + for (;;) { if (srcu_readers_active_idx_check(ssp, idx)) return true; - if (--trycount + !srcu_get_delay(ssp) <= 0) + if ((--trycount + curdelay) <= 0) return false; - udelay(SRCU_RETRY_CHECK_DELAY); + udelay(srcu_retry_check_delay); } } @@ -1582,7 +1627,7 @@ static void process_srcu(struct work_struct *work) j = jiffies; if (READ_ONCE(ssp->reschedule_jiffies) == j) { WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1); - if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY) + if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay) curdelay = 1; } else { WRITE_ONCE(ssp->reschedule_count, 1); @@ -1674,6 +1719,11 @@ static int __init srcu_bootup_announce(void) pr_info("Hierarchical SRCU implementation.\n"); if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF) pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff); + if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY) + pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay); + if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY) + pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay); + pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase); return 0; } early_initcall(srcu_bootup_announce); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b5152961b743..7bf561262cb8 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1701,7 +1701,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * the throttle. */ p->dl.dl_throttled = 0; - BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH); + if (!(flags & ENQUEUE_REPLENISH)) + printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n", + task_pid_nr(p)); + return; } diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index bb9962b33f95..59ddb00d6944 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -454,6 +454,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) rcu_assign_pointer(watch->queue, wqueue); } +static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) +{ + const struct cred *cred; + struct watch *w; + + hlist_for_each_entry(w, &wlist->watchers, list_node) { + struct watch_queue *wq = rcu_access_pointer(w->queue); + if (wqueue == wq && watch->id == w->id) + return -EBUSY; + } + + cred = current_cred(); + if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { + atomic_dec(&cred->user->nr_watches); + return -EAGAIN; + } + + watch->cred = get_cred(cred); + rcu_assign_pointer(watch->watch_list, wlist); + + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + hlist_add_head_rcu(&watch->list_node, &wlist->watchers); + return 0; +} + /** * add_watch_to_object - Add a watch on an object to a watch list * @watch: The watch to add @@ -468,34 +495,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) */ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) { - struct watch_queue *wqueue = rcu_access_pointer(watch->queue); - struct watch *w; - - hlist_for_each_entry(w, &wlist->watchers, list_node) { - struct watch_queue *wq = rcu_access_pointer(w->queue); - if (wqueue == wq && watch->id == w->id) - return -EBUSY; - } - - watch->cred = get_current_cred(); - rcu_assign_pointer(watch->watch_list, wlist); + struct watch_queue *wqueue; + int ret = -ENOENT; - if (atomic_inc_return(&watch->cred->user->nr_watches) > - task_rlimit(current, RLIMIT_NOFILE)) { - atomic_dec(&watch->cred->user->nr_watches); - put_cred(watch->cred); - return -EAGAIN; - } + rcu_read_lock(); + wqueue = rcu_access_pointer(watch->queue); if (lock_wqueue(wqueue)) { - kref_get(&wqueue->usage); - kref_get(&watch->usage); - hlist_add_head(&watch->queue_node, &wqueue->watches); + spin_lock(&wlist->lock); + ret = add_one_watch(watch, wlist, wqueue); + spin_unlock(&wlist->lock); unlock_wqueue(wqueue); } - hlist_add_head(&watch->list_node, &wlist->watchers); - return 0; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(add_watch_to_object); @@ -87,7 +87,8 @@ retry: * belongs to this folio. */ if (unlikely(page_folio(page) != folio)) { - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); goto retry; } @@ -176,7 +177,8 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) refs *= GUP_PIN_COUNTING_BIAS; } - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); } /** diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a57e1be41401..a18c071c294e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4788,8 +4788,13 @@ again: * sharing with another vma. */ ; - } else if (unlikely(is_hugetlb_entry_migration(entry) || - is_hugetlb_entry_hwpoisoned(entry))) { + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { + bool uffd_wp = huge_pte_uffd_wp(entry); + + if (!userfaultfd_wp(dst_vma) && uffd_wp) + entry = huge_pte_clear_uffd_wp(entry); + set_huge_pte_at(dst, addr, dst_pte, entry); + } else if (unlikely(is_hugetlb_entry_migration(entry))) { swp_entry_t swp_entry = pte_to_swp_entry(entry); bool uffd_wp = huge_pte_uffd_wp(entry); @@ -5947,6 +5952,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, page = alloc_huge_page(dst_vma, dst_addr, 0); if (IS_ERR(page)) { + put_page(*pagep); ret = -ENOMEM; *pagep = NULL; goto out; diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 4b5e5a3d3a63..6aff49f6b79e 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -603,14 +603,6 @@ static unsigned long kfence_init_pool(void) addr += 2 * PAGE_SIZE; } - /* - * The pool is live and will never be deallocated from this point on. - * Remove the pool object from the kmemleak object tree, as it would - * otherwise overlap with allocations returned by kfence_alloc(), which - * are registered with kmemleak through the slab post-alloc hook. - */ - kmemleak_free(__kfence_pool); - return 0; } @@ -623,8 +615,16 @@ static bool __init kfence_init_pool_early(void) addr = kfence_init_pool(); - if (!addr) + if (!addr) { + /* + * The pool is live and will never be deallocated from this point on. + * Ignore the pool object from the kmemleak phys object tree, as it would + * otherwise overlap with allocations returned by kfence_alloc(), which + * are registered with kmemleak through the slab post-alloc hook. + */ + kmemleak_ignore_phys(__pa(__kfence_pool)); return true; + } /* * Only release unprotected pages, and do not try to go back and change diff --git a/mm/memory.c b/mm/memory.c index 4cf7d4b6c950..1c6027adc542 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3043,7 +3043,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) pte_t entry; VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); - VM_BUG_ON(PageAnon(page) && !PageAnonExclusive(page)); + VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page)); /* * Clear the pages cpupid information as the existing @@ -4369,9 +4369,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return VM_FAULT_OOM; } - /* See comment in handle_pte_fault() */ + /* + * See comment in handle_pte_fault() for how this scenario happens, we + * need to return NOPAGE so that we drop this page. + */ if (pmd_devmap_trans_unstable(vmf->pmd)) - return 0; + return VM_FAULT_NOPAGE; vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); diff --git a/mm/memremap.c b/mm/memremap.c index b870a659eee6..745eea0f99c3 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -499,7 +499,7 @@ void free_zone_device_page(struct page *page) } #ifdef CONFIG_FS_DAX -bool __put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page_refs(struct page *page, int refs) { if (page->pgmap->type != MEMORY_DEVICE_FS_DAX) return false; @@ -509,9 +509,9 @@ bool __put_devmap_managed_page(struct page *page) * refcount is 1, then the page is free and the refcount is * stable because nobody holds a reference on the page. */ - if (page_ref_dec_return(page) == 1) + if (page_ref_sub_return(page, refs) == 1) wake_up_var(&page->_refcount); return true; } -EXPORT_SYMBOL(__put_devmap_managed_page); +EXPORT_SYMBOL(__put_devmap_managed_page_refs); #endif /* CONFIG_FS_DAX */ diff --git a/mm/secretmem.c b/mm/secretmem.c index 206ed6b40c1d..f06279d6190a 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) gfp_t gfp = vmf->gfp_mask; unsigned long addr; struct page *page; + vm_fault_t ret; int err; if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return vmf_error(-EINVAL); + filemap_invalidate_lock_shared(mapping); + retry: page = find_lock_page(mapping, offset); if (!page) { page = alloc_page(gfp | __GFP_ZERO); - if (!page) - return VM_FAULT_OOM; + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } err = set_direct_map_invalid_noflush(page); if (err) { put_page(page); - return vmf_error(err); + ret = vmf_error(err); + goto out; } __SetPageUptodate(page); @@ -86,7 +92,8 @@ retry: if (err == -EEXIST) goto retry; - return vmf_error(err); + ret = vmf_error(err); + goto out; } addr = (unsigned long)page_address(page); @@ -94,7 +101,11 @@ retry: } vmf->page = page; - return VM_FAULT_LOCKED; + ret = VM_FAULT_LOCKED; + +out: + filemap_invalidate_unlock_shared(mapping); + return ret; } static const struct vm_operations_struct secretmem_vm_ops = { @@ -162,12 +173,20 @@ static int secretmem_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); + struct address_space *mapping = inode->i_mapping; unsigned int ia_valid = iattr->ia_valid; + int ret; + + filemap_invalidate_lock(mapping); if ((ia_valid & ATTR_SIZE) && inode->i_size) - return -EINVAL; + ret = -EINVAL; + else + ret = simple_setattr(mnt_userns, dentry, iattr); - return simple_setattr(mnt_userns, dentry, iattr); + filemap_invalidate_unlock(mapping); + + return ret; } static const struct inode_operations secretmem_iops = { diff --git a/mm/shmem.c b/mm/shmem.c index a6f565308133..b7f2d4a56867 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3392,7 +3392,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) break; case Opt_nr_blocks: ctx->blocks = memparse(param->string, &rest); - if (*rest) + if (*rest || ctx->blocks > S64_MAX) goto bad_value; ctx->seen |= SHMEM_SEEN_BLOCKS; break; @@ -3514,10 +3514,7 @@ static int shmem_reconfigure(struct fs_context *fc) raw_spin_lock(&sbinfo->stat_lock); inodes = sbinfo->max_inodes - sbinfo->free_inodes; - if (ctx->blocks > S64_MAX) { - err = "Number of blocks too large"; - goto out; - } + if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { err = "Cannot retroactively limit size"; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 148ce629a59f..e6d804b82b67 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5297,6 +5297,9 @@ int hci_suspend_sync(struct hci_dev *hdev) return err; } + /* Update event mask so only the allowed event can wakeup the host */ + hci_set_event_mask_sync(hdev); + /* Only configure accept list if disconnect succeeded and wake * isn't being prevented. */ @@ -5308,9 +5311,6 @@ int hci_suspend_sync(struct hci_dev *hdev) /* Unpause to take care of updating scanning params */ hdev->scanning_paused = false; - /* Update event mask so only the allowed event can wakeup the host */ - hci_set_event_mask_sync(hdev); - /* Enable event filter for paired devices */ hci_update_event_filter_sync(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 09ecaf556de5..77c0aac14539 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, } /* Find channel with given SCID. - * Returns locked channel. */ + * Returns a reference locked channel. + */ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { @@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; } /* Find channel with given DCID. - * Returns locked channel. + * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) @@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_ident(conn, ident); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c) kref_get(&c->kref); } +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); + + if (!kref_get_unless_zero(&c->kref)) + return NULL; + + return c; +} + void l2cap_chan_put(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); @@ -1969,7 +1992,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); + if (!c) + continue; + read_unlock(&chan_list_lock); return c; } @@ -1984,7 +2010,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, } if (c1) - l2cap_chan_hold(c1); + c1 = l2cap_chan_hold_unless_zero(c1); read_unlock(&chan_list_lock); @@ -4464,6 +4490,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -4578,6 +4605,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -5305,6 +5333,7 @@ send_move_response: l2cap_send_move_chan_rsp(chan, result); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5397,6 +5426,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, @@ -5426,6 +5456,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static int l2cap_move_channel_rsp(struct l2cap_conn *conn, @@ -5489,6 +5520,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5524,6 +5556,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5896,12 +5929,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (credits > max_credits) { BT_ERR("LE credits overflow"); l2cap_send_disconn_req(chan, ECONNRESET); - l2cap_chan_unlock(chan); /* Return 0 so that we don't trigger an unnecessary * command reject packet. */ - return 0; + goto unlock; } chan->tx_credits += credits; @@ -5912,7 +5944,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (chan->tx_credits) chan->ops->resume(chan); +unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -7598,6 +7632,7 @@ drop: done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, @@ -8086,7 +8121,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, if (src_type != c->src_type) continue; - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); read_unlock(&chan_list_lock); return c; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8cfafd7a0576..646d10401b80 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4844,7 +4844,6 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, else status = MGMT_STATUS_FAILED; - mgmt_pending_remove(cmd); goto unlock; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 1ef14a099c6b..5aeb3646e74c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -589,9 +589,13 @@ static int br_fill_ifinfo(struct sk_buff *skb, } done: + if (af) { + if (nlmsg_get_pos(skb) - (void *)af > nla_attr_size(0)) + nla_nest_end(skb, af); + else + nla_nest_cancel(skb, af); + } - if (af) - nla_nest_end(skb, af); nlmsg_end(skb, nlh); return 0; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 251e666ba9a2..748be7253248 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -47,7 +47,7 @@ enum caif_states { struct caifsock { struct sock sk; /* must be first member */ struct cflayer layer; - u32 flow_state; + unsigned long flow_state; struct caif_connect_request conn_req; struct mutex readlock; struct dentry *debugfs_socket_dir; @@ -56,38 +56,32 @@ struct caifsock { static int rx_flow_is_on(struct caifsock *cf_sk) { - return test_bit(RX_FLOW_ON_BIT, - (void *) &cf_sk->flow_state); + return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); } static int tx_flow_is_on(struct caifsock *cf_sk) { - return test_bit(TX_FLOW_ON_BIT, - (void *) &cf_sk->flow_state); + return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_rx_flow_off(struct caifsock *cf_sk) { - clear_bit(RX_FLOW_ON_BIT, - (void *) &cf_sk->flow_state); + clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_rx_flow_on(struct caifsock *cf_sk) { - set_bit(RX_FLOW_ON_BIT, - (void *) &cf_sk->flow_state); + set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_tx_flow_off(struct caifsock *cf_sk) { - clear_bit(TX_FLOW_ON_BIT, - (void *) &cf_sk->flow_state); + clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_tx_flow_on(struct caifsock *cf_sk) { - set_bit(TX_FLOW_ON_BIT, - (void *) &cf_sk->flow_state); + set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); } static void caif_read_lock(struct sock *sk) diff --git a/net/core/devlink.c b/net/core/devlink.c index 98d79feeb3dc..c43c96554a3e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -70,6 +70,7 @@ struct devlink { u8 reload_failed:1; refcount_t refcount; struct completion comp; + struct rcu_head rcu; char priv[] __aligned(NETDEV_ALIGN); }; @@ -88,6 +89,7 @@ struct devlink_linecard { const char *type; struct devlink_linecard_type *types; unsigned int types_count; + struct devlink *nested_devlink; }; /** @@ -199,6 +201,10 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), }; +static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { + [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, +}; + static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define DEVLINK_REGISTERED XA_MARK_1 @@ -221,8 +227,6 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); /* devlink_mutex * * An overall lock guarding every operation coming from userspace. - * It also guards devlink devices list and it is taken when - * driver registers/unregisters it. */ static DEFINE_MUTEX(devlink_mutex); @@ -232,10 +236,21 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); +static void __devlink_put_rcu(struct rcu_head *head) +{ + struct devlink *devlink = container_of(head, struct devlink, rcu); + + complete(&devlink->comp); +} + void devlink_put(struct devlink *devlink) { if (refcount_dec_and_test(&devlink->refcount)) - complete(&devlink->comp); + /* Make sure unregister operation that may await the completion + * is unblocked only after all users are after the end of + * RCU grace period. + */ + call_rcu(&devlink->rcu, __devlink_put_rcu); } struct devlink *__must_check devlink_try_get(struct devlink *devlink) @@ -278,12 +293,62 @@ void devl_unlock(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devl_unlock); +static struct devlink * +devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter, + void * (*xa_find_fn)(struct xarray *, unsigned long *, + unsigned long, xa_mark_t)) +{ + struct devlink *devlink; + + rcu_read_lock(); +retry: + devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); + if (!devlink) + goto unlock; + /* For a possible retry, the xa_find_after() should be always used */ + xa_find_fn = xa_find_after; + if (!devlink_try_get(devlink)) + goto retry; + if (!net_eq(devlink_net(devlink), net)) { + devlink_put(devlink); + goto retry; + } +unlock: + rcu_read_unlock(); + return devlink; +} + +static struct devlink *devlinks_xa_find_get_first(struct net *net, + unsigned long *indexp, + xa_mark_t filter) +{ + return devlinks_xa_find_get(net, indexp, filter, xa_find); +} + +static struct devlink *devlinks_xa_find_get_next(struct net *net, + unsigned long *indexp, + xa_mark_t filter) +{ + return devlinks_xa_find_get(net, indexp, filter, xa_find_after); +} + +/* Iterate over devlink pointers which were possible to get reference to. + * devlink_put() needs to be called for each iterated devlink pointer + * in loop body in order to release the reference. + */ +#define devlinks_xa_for_each_get(net, index, devlink, filter) \ + for (index = 0, \ + devlink = devlinks_xa_find_get_first(net, &index, filter); \ + devlink; devlink = devlinks_xa_find_get_next(net, &index, filter)) + +#define devlinks_xa_for_each_registered_get(net, index, devlink) \ + devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED) + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { struct devlink *devlink; unsigned long index; - bool found = false; char *busname; char *devname; @@ -293,21 +358,14 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); - lockdep_assert_held(&devlink_mutex); - - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + devlinks_xa_for_each_registered_get(net, index, devlink) { if (strcmp(devlink->dev->bus->name, busname) == 0 && - strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) { - found = true; - break; - } + strcmp(dev_name(devlink->dev), devname) == 0) + return devlink; + devlink_put(devlink); } - if (!found || !devlink_try_get(devlink)) - devlink = ERR_PTR(-ENODEV); - - return devlink; + return ERR_PTR(-ENODEV); } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, @@ -641,6 +699,10 @@ struct devlink_region { const struct devlink_region_ops *ops; const struct devlink_port_region_ops *port_ops; }; + struct mutex snapshot_lock; /* protects snapshot_list, + * max_snapshots and cur_snapshots + * consistency. + */ struct list_head snapshot_list; u32 max_snapshots; u32 cur_snapshots; @@ -803,6 +865,24 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return 0; } +static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink) +{ + struct nlattr *nested_attr; + + nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); + if (!nested_attr) + return -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + nla_nest_end(msg, nested_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(msg, nested_attr); + return -EMSGSIZE; +} + struct devlink_reload_combination { enum devlink_reload_action action; enum devlink_reload_limit limit; @@ -1329,13 +1409,7 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; @@ -1356,7 +1430,6 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -1432,15 +1505,7 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) { - devlink_put(devlink); - continue; - } - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start) { idx++; devlink_put(devlink); @@ -1495,13 +1560,7 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { @@ -1521,7 +1580,6 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -2104,6 +2162,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } + if (linecard->nested_devlink && + devlink_nl_put_nested_handle(msg, linecard->nested_devlink)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -2177,13 +2239,7 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->linecards_lock); list_for_each_entry(linecard, &devlink->linecard_list, list) { if (idx < start) { @@ -2206,7 +2262,6 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, idx++; } mutex_unlock(&devlink->linecards_lock); -retry: devlink_put(devlink); } out: @@ -2449,13 +2504,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { @@ -2475,7 +2524,6 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -2601,12 +2649,8 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_pool_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_pool_get) goto retry; devl_lock(devlink); @@ -2822,12 +2866,8 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_port_pool_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_port_pool_get) goto retry; devl_lock(devlink); @@ -3071,12 +3111,8 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_tc_pool_bind_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_tc_pool_bind_get) goto retry; devl_lock(devlink); @@ -4798,6 +4834,206 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb, return ret; } +static int +devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink, + u32 portid, u32 seq, int flags, + struct netlink_ext_ack *extack) +{ + struct nlattr *selftests; + void *hdr; + int err; + int i; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, + DEVLINK_CMD_SELFTESTS_GET); + if (!hdr) + return -EMSGSIZE; + + err = -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto err_cancel_msg; + + selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); + if (!selftests) + goto err_cancel_msg; + + for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; + i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { + if (devlink->ops->selftest_check(devlink, i, extack)) { + err = nla_put_flag(msg, i); + if (err) + goto err_cancel_msg; + } + } + + nla_nest_end(msg, selftests); + genlmsg_end(msg, hdr); + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); + return err; +} + +static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct sk_buff *msg; + int err; + + if (!devlink->ops->selftest_check) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid, + info->snd_seq, 0, info->extack); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + int start = cb->args[0]; + unsigned long index; + int idx = 0; + int err = 0; + + mutex_lock(&devlink_mutex); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (idx < start || !devlink->ops->selftest_check) + goto inc; + + devl_lock(devlink); + err = devlink_nl_selftests_fill(msg, devlink, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->extack); + devl_unlock(devlink); + if (err) { + devlink_put(devlink); + break; + } +inc: + idx++; + devlink_put(devlink); + } + mutex_unlock(&devlink_mutex); + + if (err != -EMSGSIZE) + return err; + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id, + enum devlink_selftest_status test_status) +{ + struct nlattr *result_attr; + + result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT); + if (!result_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) || + nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS, + test_status)) + goto nla_put_failure; + + nla_nest_end(skb, result_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, result_attr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_selftests_run(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; + struct devlink *devlink = info->user_ptr[0]; + struct nlattr *attrs, *selftests; + struct sk_buff *msg; + void *hdr; + int err; + int i; + + if (!devlink->ops->selftest_run || !devlink->ops->selftest_check) + return -EOPNOTSUPP; + + if (!info->attrs[DEVLINK_ATTR_SELFTESTS]) { + NL_SET_ERR_MSG_MOD(info->extack, "selftest required"); + return -EINVAL; + } + + attrs = info->attrs[DEVLINK_ATTR_SELFTESTS]; + + err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs, + devlink_selftest_nl_policy, info->extack); + if (err < 0) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = -EMSGSIZE; + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN); + if (!hdr) + goto free_msg; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); + if (!selftests) + goto genlmsg_cancel; + + for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; + i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { + enum devlink_selftest_status test_status; + + if (nla_get_flag(tb[i])) { + if (!devlink->ops->selftest_check(devlink, i, + info->extack)) { + if (devlink_selftest_result_put(msg, i, + DEVLINK_SELFTEST_STATUS_SKIP)) + goto selftests_nest_cancel; + continue; + } + + test_status = devlink->ops->selftest_run(devlink, i, + info->extack); + if (devlink_selftest_result_put(msg, i, test_status)) + goto selftests_nest_cancel; + } + } + + nla_nest_end(msg, selftests); + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + +selftests_nest_cancel: + nla_nest_cancel(msg, selftests); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return err; +} + static const struct devlink_param devlink_param_generic[] = { { .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, @@ -5158,13 +5394,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(param_item, &devlink->param_list, list) { if (idx < start) { @@ -5186,7 +5416,6 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -5393,13 +5622,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { list_for_each_entry(param_item, @@ -5426,7 +5649,6 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, } } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -5676,21 +5898,28 @@ static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id) { unsigned long count; void *p; + int err; - devl_assert_locked(devlink); - + xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); - if (WARN_ON(!p)) - return -EINVAL; + if (WARN_ON(!p)) { + err = -EINVAL; + goto unlock; + } - if (WARN_ON(!xa_is_value(p))) - return -EINVAL; + if (WARN_ON(!xa_is_value(p))) { + err = -EINVAL; + goto unlock; + } count = xa_to_value(p); count++; - return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), - GFP_KERNEL)); + err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), + GFP_ATOMIC)); +unlock: + xa_unlock(&devlink->snapshot_ids); + return err; } /** @@ -5713,25 +5942,26 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) unsigned long count; void *p; - devl_assert_locked(devlink); - + xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); if (WARN_ON(!p)) - return; + goto unlock; if (WARN_ON(!xa_is_value(p))) - return; + goto unlock; count = xa_to_value(p); if (count > 1) { count--; - xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), - GFP_KERNEL); + __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), + GFP_ATOMIC); } else { /* If this was the last user, we can erase this id */ - xa_erase(&devlink->snapshot_ids, id); + __xa_erase(&devlink->snapshot_ids, id); } +unlock: + xa_unlock(&devlink->snapshot_ids); } /** @@ -5752,13 +5982,17 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) */ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) { - devl_assert_locked(devlink); + int err; - if (xa_load(&devlink->snapshot_ids, id)) + xa_lock(&devlink->snapshot_ids); + if (xa_load(&devlink->snapshot_ids, id)) { + xa_unlock(&devlink->snapshot_ids); return -EEXIST; - - return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), - GFP_KERNEL)); + } + err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), + GFP_ATOMIC)); + xa_unlock(&devlink->snapshot_ids); + return err; } /** @@ -5779,8 +6013,6 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) */ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { - devl_assert_locked(devlink); - return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1), xa_limit_32b, GFP_KERNEL); } @@ -5793,7 +6025,7 @@ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) * Multiple snapshots can be created on a region. * The @snapshot_id should be obtained using the getter function. * - * Must be called only while holding the devlink instance lock. + * Must be called only while holding the region snapshot lock. * * @region: devlink region of the snapshot * @data: snapshot data @@ -5807,7 +6039,7 @@ __devlink_region_snapshot_create(struct devlink_region *region, struct devlink_snapshot *snapshot; int err; - devl_assert_locked(devlink); + lockdep_assert_held(®ion->snapshot_lock); /* check if region can hold one more snapshot */ if (region->cur_snapshots == region->max_snapshots) @@ -5845,7 +6077,7 @@ static void devlink_region_snapshot_del(struct devlink_region *region, { struct devlink *devlink = region->devlink; - devl_assert_locked(devlink); + lockdep_assert_held(®ion->snapshot_lock); devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); region->cur_snapshots--; @@ -5977,16 +6209,9 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, &idx, start); -retry: devlink_put(devlink); if (err) goto out; @@ -6031,11 +6256,15 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb, if (!region) return -EINVAL; + mutex_lock(®ion->snapshot_lock); snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (!snapshot) + if (!snapshot) { + mutex_unlock(®ion->snapshot_lock); return -EINVAL; + } devlink_region_snapshot_del(region, snapshot); + mutex_unlock(®ion->snapshot_lock); return 0; } @@ -6083,9 +6312,12 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } + mutex_lock(®ion->snapshot_lock); + if (region->cur_snapshots == region->max_snapshots) { NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots"); - return -ENOSPC; + err = -ENOSPC; + goto unlock; } snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; @@ -6094,17 +6326,18 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) if (devlink_region_snapshot_get_by_id(region, snapshot_id)) { NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use"); - return -EEXIST; + err = -EEXIST; + goto unlock; } err = __devlink_snapshot_id_insert(devlink, snapshot_id); if (err) - return err; + goto unlock; } else { err = __devlink_region_snapshot_id_get(devlink, &snapshot_id); if (err) { NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id"); - return err; + goto unlock; } } @@ -6142,16 +6375,20 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) goto err_notify; } + mutex_unlock(®ion->snapshot_lock); return 0; err_snapshot_create: region->ops->destructor(data); err_snapshot_capture: __devlink_snapshot_id_decrement(devlink, snapshot_id); + mutex_unlock(®ion->snapshot_lock); return err; err_notify: devlink_region_snapshot_del(region, snapshot); +unlock: + mutex_unlock(®ion->snapshot_lock); return err; } @@ -6511,13 +6748,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start || !devlink->ops->info_get) goto inc; @@ -6535,7 +6766,6 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, } inc: idx++; -retry: devlink_put(devlink); } mutex_unlock(&devlink_mutex); @@ -7527,6 +7757,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state prev_health_state; struct devlink *devlink = reporter->devlink; unsigned long recover_ts_threshold; + int ret; /* write a log message of the current error */ WARN_ON(!msg); @@ -7560,11 +7791,14 @@ int devlink_health_report(struct devlink_health_reporter *reporter, mutex_unlock(&reporter->dump_lock); } - if (reporter->auto_recover) - return devlink_health_reporter_recover(reporter, - priv_ctx, NULL); + if (!reporter->auto_recover) + return 0; - return 0; + devl_lock(devlink); + ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL); + devl_unlock(devlink); + + return ret; } EXPORT_SYMBOL_GPL(devlink_health_report); @@ -7691,13 +7925,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry_rep; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { @@ -7717,17 +7945,10 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, idx++; } mutex_unlock(&devlink->reporters_lock); -retry_rep: devlink_put(devlink); } - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry_port; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(port, &devlink->port_list, list) { mutex_lock(&port->reporters_lock); @@ -7752,7 +7973,6 @@ retry_rep: mutex_unlock(&port->reporters_lock); } devl_unlock(devlink); -retry_port: devlink_put(devlink); } out: @@ -8291,13 +8511,7 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < start) { @@ -8317,7 +8531,6 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -8518,13 +8731,7 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(group_item, &devlink->trap_group_list, list) { @@ -8545,7 +8752,6 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -8832,13 +9038,7 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { @@ -8859,7 +9059,6 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -9004,6 +9203,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -9273,8 +9473,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, /* can be retrieved by unprivileged users */ }, { @@ -9282,24 +9481,21 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, @@ -9313,16 +9509,14 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_test_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_FLASH_UPDATE, @@ -9363,6 +9557,17 @@ static const struct genl_small_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_trap_policer_set_doit, .flags = GENL_ADMIN_PERM, }, + { + .cmd = DEVLINK_CMD_SELFTESTS_GET, + .doit = devlink_nl_cmd_selftests_get_doit, + .dumpit = devlink_nl_cmd_selftests_get_dumpit + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SELFTESTS_RUN, + .doit = devlink_nl_cmd_selftests_run, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -9589,10 +9794,8 @@ void devlink_register(struct devlink *devlink) ASSERT_DEVLINK_NOT_REGISTERED(devlink); /* Make sure that we are in .probe() routine */ - mutex_lock(&devlink_mutex); xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); - mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_register); @@ -9609,10 +9812,8 @@ void devlink_unregister(struct devlink *devlink) devlink_put(devlink); wait_for_completion(&devlink->comp); - mutex_lock(&devlink_mutex); devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); - mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -10316,6 +10517,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); + WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); @@ -10334,6 +10536,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); void devlink_linecard_provision_fail(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); + WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); mutex_unlock(&linecard->state_lock); @@ -10381,6 +10584,23 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); +/** + * devlink_linecard_nested_dl_set - Attach/detach nested devlink + * instance to linecard. + * + * @linecard: devlink linecard + * @nested_devlink: devlink instance to attach or NULL to detach + */ +void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink) +{ + mutex_lock(&linecard->state_lock); + linecard->nested_devlink = nested_devlink; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set); + int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -11104,6 +11324,7 @@ struct devlink_region *devl_region_create(struct devlink *devlink, region->ops = ops; region->size = region_size; INIT_LIST_HEAD(®ion->snapshot_list); + mutex_init(®ion->snapshot_lock); list_add_tail(®ion->list, &devlink->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); @@ -11177,6 +11398,7 @@ devlink_port_region_create(struct devlink_port *port, region->port_ops = ops; region->size = region_size; INIT_LIST_HEAD(®ion->snapshot_list); + mutex_init(®ion->snapshot_lock); list_add_tail(®ion->list, &port->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); @@ -11206,6 +11428,7 @@ void devl_region_destroy(struct devlink_region *region) devlink_region_snapshot_del(region, snapshot); list_del(®ion->list); + mutex_destroy(®ion->snapshot_lock); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); kfree(region); @@ -11246,13 +11469,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy); */ int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { - int err; - - devl_lock(devlink); - err = __devlink_region_snapshot_id_get(devlink, id); - devl_unlock(devlink); - - return err; + return __devlink_region_snapshot_id_get(devlink, id); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); @@ -11268,9 +11485,7 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); */ void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id) { - devl_lock(devlink); __devlink_snapshot_id_decrement(devlink, id); - devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); @@ -11289,13 +11504,11 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id) { - struct devlink *devlink = region->devlink; int err; - devl_lock(devlink); + mutex_lock(®ion->snapshot_lock); err = __devlink_region_snapshot_create(region, data, snapshot_id); - devl_unlock(devlink); - + mutex_unlock(®ion->snapshot_lock); return err; } EXPORT_SYMBOL_GPL(devlink_region_snapshot_create); @@ -12281,13 +12494,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) * all devlink instances from this namespace into init_net. */ mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), net)) - goto retry; - + devlinks_xa_for_each_registered_get(net, index, devlink) { WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, @@ -12295,7 +12502,6 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) &actions_performed, NULL); if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); -retry: devlink_put(devlink); } mutex_unlock(&devlink_mutex); diff --git a/net/core/filter.c b/net/core/filter.c index 57c5e4c4efd2..5669248aff25 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3918,7 +3918,7 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) offset -= frag_size; } out: - return offset + len < size ? addr + offset : NULL; + return offset + len <= size ? addr + offset : NULL; } BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, @@ -4653,6 +4653,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); + info->key.flow_flags = FLOWI_FLAG_ANYSRC; } return 0; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6aee04f75e3e..237d396b6e41 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -895,6 +895,11 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, return result == BPF_OK; } +static bool is_pppoe_ses_hdr_valid(struct pppoe_hdr hdr) +{ + return hdr.ver == 1 && hdr.type == 1 && hdr.code == 0; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @net: associated network namespace, derived from @skb if NULL @@ -1214,26 +1219,60 @@ proto_again: struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; + u16 ppp_proto; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } - nhoff += PPPOE_SES_HLEN; - switch (hdr->proto) { - case htons(PPP_IP): + if (!is_pppoe_ses_hdr_valid(hdr->hdr)) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + /* least significant bit of the most significant octet + * indicates if protocol field was compressed + */ + ppp_proto = ntohs(hdr->proto); + if (ppp_proto & 0x0100) { + ppp_proto = ppp_proto >> 8; + nhoff += PPPOE_SES_HLEN - 1; + } else { + nhoff += PPPOE_SES_HLEN; + } + + if (ppp_proto == PPP_IP) { proto = htons(ETH_P_IP); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - case htons(PPP_IPV6): + } else if (ppp_proto == PPP_IPV6) { proto = htons(ETH_P_IPV6); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - default: + } else if (ppp_proto == PPP_MPLS_UC) { + proto = htons(ETH_P_MPLS_UC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto == PPP_MPLS_MC) { + proto = htons(ETH_P_MPLS_MC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto_is_valid(ppp_proto)) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + } else { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE)) { + struct flow_dissector_key_pppoe *key_pppoe; + + key_pppoe = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE, + target_container); + key_pppoe->session_id = hdr->hdr.sid; + key_pppoe->ppp_proto = htons(ppp_proto); + key_pppoe->type = htons(ETH_P_PPP_SES); + } break; } case htons(ETH_P_TIPC): { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 0d3075d3c8fb..8cfb63528d18 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -230,6 +230,13 @@ void flow_rule_match_ct(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_ct); +void flow_rule_match_pppoe(const struct flow_rule *rule, + struct flow_match_pppoe *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PPPOE, out); +} +EXPORT_SYMBOL(flow_rule_match_pppoe); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index aa4f43f52499..6582dfdfb932 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -484,8 +484,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf sk->sk_family = PF_DECnet; sk->sk_protocol = 0; sk->sk_allocation = gfp; - sk->sk_sndbuf = sysctl_decnet_wmem[1]; - sk->sk_rcvbuf = sysctl_decnet_rmem[1]; + sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]); + sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]); /* Initialization of DECnet Session Control Port */ scp = DN_SK(sk); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 2b56218fc57c..4dfd68cf61c5 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -344,6 +344,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag, ether_addr_copy(a->addr, addr); a->vid = vid; + a->db = db; refcount_set(&a->refcount, 1); list_add_tail(&a->list, &lag->fdbs); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 46e8a5125853..452ff177e4da 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) { + u8 fib_notify_on_flag_change; struct fib_alias *fa_match; struct sk_buff *skb; int err; @@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) WRITE_ONCE(fa_match->offload, fri->offload); WRITE_ONCE(fa_match->trap, fri->trap); + fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change); + /* 2 means send notifications only if offload_failed was changed. */ - if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && + if (fib_notify_on_flag_change == 2 && READ_ONCE(fa_match->offload_failed) == fri->offload_failed) goto out; WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); - if (!net->ipv4.sysctl_fib_notify_on_flag_change) + if (!fib_notify_on_flag_change) goto out; skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ba2bdc811374..970e9a2cca4a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -454,8 +454,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); - WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); sk_sockets_allocated_inc(sk); } @@ -688,7 +688,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sock_net(sk)->ipv4.sysctl_tcp_autocorking && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize && tcp_skb_can_collapse_to(skb); @@ -1635,7 +1635,7 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) +struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; u32 offset; @@ -1658,6 +1658,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) } return NULL; } +EXPORT_SYMBOL(tcp_recv_skb); /* * This routine provides an alternative to tcp_recvmsg() for routines @@ -1788,6 +1789,45 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) } EXPORT_SYMBOL(tcp_read_skb); +void tcp_read_done(struct sock *sk, size_t len) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 seq = tp->copied_seq; + struct sk_buff *skb; + size_t left; + u32 offset; + + if (sk->sk_state == TCP_LISTEN) + return; + + left = len; + while (left && (skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { + int used; + + used = min_t(size_t, skb->len - offset, left); + seq += used; + left -= used; + + if (skb->len > offset + used) + break; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { + tcp_eat_recv_skb(sk, skb); + ++seq; + break; + } + tcp_eat_recv_skb(sk, skb); + } + WRITE_ONCE(tp->copied_seq, seq); + + tcp_rcv_space_adjust(sk); + + /* Clean up data we have read: This will do ACK frames. */ + if (left != len) + tcp_cleanup_rbuf(sk, len - left); +} +EXPORT_SYMBOL(tcp_read_done); + int tcp_peek_len(struct socket *sock) { return tcp_inq(sock->sk); @@ -1802,7 +1842,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else - cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; + cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; val = min(val, cap); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); @@ -4533,9 +4573,18 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } - /* check the signature */ - genhash = tp->af_specific->calc_md5_hash(newhash, hash_expected, - NULL, skb); + /* Check the signature. + * To support dual stack listeners, we need to handle + * IPv4-mapped case. + */ + if (family == AF_INET) + genhash = tcp_v4_md5_hash_skb(newhash, + hash_expected, + NULL, skb); + else + genhash = tp->af_specific->calc_md5_hash(newhash, + hash_expected, + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ae73b34d32e9..ab5f0ea166f1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk) if (sk->sk_sndbuf < sndmem) WRITE_ONCE(sk->sk_sndbuf, - min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); + min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2]))); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; - int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; + int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, */ static void tcp_init_buffer_space(struct sock *sk) { - int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; + int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); + int rmem2; icsk->icsk_ack.quick = 0; + rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); - if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && + if (sk->sk_rcvbuf < rmem2 && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { WRITE_ONCE(sk->sk_rcvbuf, - min(atomic_read(&sk->sk_rmem_alloc), - net->ipv4.sysctl_tcp_rmem[2])); + min(atomic_read(&sk->sk_rmem_alloc), rmem2)); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -724,7 +725,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * <prev RTT . ><current RTT .. ><next RTT .... > */ - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; @@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk) do_div(rcvwin, tp->advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); @@ -909,9 +910,9 @@ static void tcp_update_pacing_rate(struct sock *sk) * end of slow start and should slow down. */ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) - rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; + rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio); else - rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; + rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio); rate *= max(tcp_snd_cwnd(tp), tp->packets_out); @@ -2174,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk) * loss recovery is underway except recurring timeout(s) on * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing */ - tp->frto = net->ipv4.sysctl_tcp_frto && + tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } @@ -3057,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { - u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; + u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; struct tcp_sock *tp = tcp_sk(sk); if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { @@ -3580,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && + elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } @@ -3628,7 +3630,7 @@ static void tcp_send_challenge_ack(struct sock *sk) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; @@ -4425,7 +4427,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4472,7 +4474,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; tcp_rcv_spurious_retrans(sk, skb); @@ -5516,7 +5518,7 @@ send_now: } if (!tcp_is_sack(tp) || - tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) + tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) goto send_now; if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { @@ -5537,11 +5539,12 @@ send_now: if (tp->srtt_us && tp->srtt_us < rtt) rtt = tp->srtt_us; - delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, + delay = min_t(unsigned long, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), rtt * (NSEC_PER_USEC >> 3)/20); sock_hold(sk); hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), - sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), HRTIMER_MODE_REL_PINNED_SOFT); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c7e7101647dc..0c83780dc9bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1008,7 +1008,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); - tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (inet_sk(sk)->tos & INET_ECN_MASK) : inet_sk(sk)->tos; @@ -1528,7 +1528,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ - if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; if (!dst) { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index a501150deaa3..d58e672be31c 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (net->ipv4.sysctl_tcp_nometrics_save || !dst) + if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) return; rcu_read_lock(); @@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) if (tcp_in_initial_slowstart(tp)) { /* Slow start still did not finish. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && (tcp_snd_cwnd(tp) >> 1) > val) @@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh)); @@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_ssthresh) >> 1); } - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && tp->snd_ssthresh > val) @@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) if (tcp_metric_locked(tm, TCP_METRIC_CWND)) tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); - val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? + val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val) { tp->snd_ssthresh = val; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2b72ccd2e651..78b654ff421b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - /* If this is the first data packet sent in response to the - * previous received data, - * and it is a reply for ato after last received packet, - * increase pingpong count. - */ - if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && - (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_inc_pingpong_cnt(sk); - tp->lsndtime = now; + + /* If it is a reply for ato after last received + * packet, enter pingpong mode. + */ + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_enter_pingpong_mode(sk); } /* Account for an ACK we sent. */ @@ -230,7 +227,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. */ - if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = min_t(u32, space, U16_MAX); @@ -241,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, *rcv_wscale = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); *rcv_wscale = clamp_t(int, ilog2(space) - 15, @@ -285,7 +282,7 @@ static u16 tcp_select_window(struct sock *sk) * scaled window. */ if (!tp->rx_opt.rcv_wscale && - sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); @@ -1976,7 +1973,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); - r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log; + r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) bytes += sk->sk_gso_max_size >> r; @@ -1995,7 +1992,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) min_tso = ca_ops->min_tso_segs ? ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); @@ -2507,7 +2504,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, - sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); limit <<= factor; if (static_branch_unlikely(&tcp_tx_delay_enabled) && diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d546fc09d803..a9ba41648e36 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2133,10 +2133,8 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, */ cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && - cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { - rcu_read_unlock(); + cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) goto forward; - } } /* diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7f695c39d9a8..87c699d57b36 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work) if (++cnt >= MLD_MAX_QUEUE) { rework = true; - schedule_delayed_work(&idev->mc_query_work, 0); break; } } @@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work) __mld_query_work(skb); mutex_unlock(&idev->mc_lock); - if (!rework) - in6_dev_put(idev); + if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0)) + return; + + in6_dev_put(idev); } /* called with rcu_read_lock() */ @@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work) if (++cnt >= MLD_MAX_QUEUE) { rework = true; - schedule_delayed_work(&idev->mc_report_work, 0); break; } } @@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work) __mld_report_work(skb); mutex_unlock(&idev->mc_lock); - if (!rework) - in6_dev_put(idev); + if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0)) + return; + + in6_dev_put(idev); } static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index b1179f62bd23..91b840514656 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -22,6 +22,11 @@ #include <linux/proc_fs.h> #include <net/ping.h> +static void ping_v6_destroy(struct sock *sk) +{ + inet6_destroy_sock(sk); +} + /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -185,6 +190,7 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, + .destroy = ping_v6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index e756ba705fd9..34db881204d2 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -36,9 +36,11 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) case SEG6_IPTUN_MODE_INLINE: break; case SEG6_IPTUN_MODE_ENCAP: + case SEG6_IPTUN_MODE_ENCAP_RED: head = sizeof(struct ipv6hdr); break; case SEG6_IPTUN_MODE_L2ENCAP: + case SEG6_IPTUN_MODE_L2ENCAP_RED: return 0; } @@ -197,6 +199,124 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } EXPORT_SYMBOL_GPL(seg6_do_srh_encap); +/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ +static int seg6_do_srh_encap_red(struct sk_buff *skb, + struct ipv6_sr_hdr *osrh, int proto) +{ + __u8 first_seg = osrh->first_segment; + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); + struct ipv6hdr *hdr, *inner_hdr; + int hdrlen = ipv6_optlen(osrh); + int red_tlv_offset, tlv_offset; + struct ipv6_sr_hdr *isrh; + bool skip_srh = false; + __be32 flowlabel; + int tot_len, err; + int red_hdrlen; + int tlvs_len; + + if (first_seg > 0) { + red_hdrlen = hdrlen - sizeof(struct in6_addr); + } else { + /* NOTE: if tag/flags and/or other TLVs are introduced in the + * seg6_iptunnel infrastructure, they should be considered when + * deciding to skip the SRH. + */ + skip_srh = !sr_has_hmac(osrh); + + red_hdrlen = skip_srh ? 0 : hdrlen; + } + + tot_len = red_hdrlen + sizeof(struct ipv6hdr); + + err = skb_cow_head(skb, tot_len + skb->mac_len); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); + + skb_push(skb, tot_len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + hdr = ipv6_hdr(skb); + + /* based on seg6_do_srh_encap() */ + if (skb->protocol == htons(ETH_P_IPV6)) { + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + flowlabel); + hdr->hop_limit = inner_hdr->hop_limit; + } else { + ip6_flow_hdr(hdr, 0, flowlabel); + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + IP6CB(skb)->iif = skb->skb_iif; + } + + /* no matter if we have to skip the SRH or not, the first segment + * always comes in the pushed IPv6 header. + */ + hdr->daddr = osrh->segments[first_seg]; + + if (skip_srh) { + hdr->nexthdr = proto; + + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + goto out; + } + + /* we cannot skip the SRH, slow path */ + + hdr->nexthdr = NEXTHDR_ROUTING; + isrh = (void *)hdr + sizeof(struct ipv6hdr); + + if (unlikely(!first_seg)) { + /* this is a very rare case; we have only one SID but + * we cannot skip the SRH since we are carrying some + * other info. + */ + memcpy(isrh, osrh, hdrlen); + goto srcaddr; + } + + tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr); + red_tlv_offset = tlv_offset - sizeof(struct in6_addr); + + memcpy(isrh, osrh, red_tlv_offset); + + tlvs_len = hdrlen - tlv_offset; + if (unlikely(tlvs_len > 0)) { + const void *s = (const void *)osrh + tlv_offset; + void *d = (void *)isrh + red_tlv_offset; + + memcpy(d, s, tlvs_len); + } + + --isrh->first_segment; + isrh->hdrlen -= 2; + +srcaddr: + isrh->nexthdr = proto; + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (unlikely(!skip_srh && sr_has_hmac(isrh))) { + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + +out: + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + + skb_postpush_rcsum(skb, hdr, tot_len); + + return 0; +} + /* insert an SRH within an IPv6 packet, just after the IPv6 header */ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) { @@ -269,6 +389,7 @@ static int seg6_do_srh(struct sk_buff *skb) return err; break; case SEG6_IPTUN_MODE_ENCAP: + case SEG6_IPTUN_MODE_ENCAP_RED: err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); if (err) return err; @@ -280,7 +401,11 @@ static int seg6_do_srh(struct sk_buff *skb) else return -EINVAL; - err = seg6_do_srh_encap(skb, tinfo->srh, proto); + if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) + err = seg6_do_srh_encap(skb, tinfo->srh, proto); + else + err = seg6_do_srh_encap_red(skb, tinfo->srh, proto); + if (err) return err; @@ -289,6 +414,7 @@ static int seg6_do_srh(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); break; case SEG6_IPTUN_MODE_L2ENCAP: + case SEG6_IPTUN_MODE_L2ENCAP_RED: if (!skb_mac_header_was_set(skb)) return -EINVAL; @@ -298,7 +424,13 @@ static int seg6_do_srh(struct sk_buff *skb) skb_mac_header_rebuild(skb); skb_push(skb, skb->mac_len); - err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET); + if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) + err = seg6_do_srh_encap(skb, tinfo->srh, + IPPROTO_ETHERNET); + else + err = seg6_do_srh_encap_red(skb, tinfo->srh, + IPPROTO_ETHERNET); + if (err) return err; @@ -517,6 +649,10 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, break; case SEG6_IPTUN_MODE_L2ENCAP: break; + case SEG6_IPTUN_MODE_ENCAP_RED: + break; + case SEG6_IPTUN_MODE_L2ENCAP_RED: + break; default: return -EINVAL; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 85b8b765dcb1..e54eee80ce5f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -546,7 +546,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); - tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (np->tclass & INET_ECN_MASK) : np->tclass; @@ -1317,7 +1317,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ - if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; /* Clone native IPv6 options from listening socket (if any) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index bd8f0f425be4..30d289044e71 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1271,7 +1271,7 @@ raise_win: if (unlikely(th->syn)) new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale; if (!tp->rx_opt.rcv_wscale && - sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows) + READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 57f23f4e3a7c..a3f1c1461874 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1873,7 +1873,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; @@ -1891,7 +1891,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) do_div(rcvwin, advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { u32 window_clamp; @@ -2634,8 +2634,8 @@ static int mptcp_init_sock(struct sock *sk) mptcp_ca_reset(sk); sk_sockets_allocated_inc(sk); - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; - sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); return 0; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d4b16d033978..901c763dcdbb 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1533,7 +1533,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); - return err; + return 0; failed_unlink: list_del(&subflow->node); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 646d5fd53604..9f976b11d896 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3340,6 +3340,8 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (err < 0) return err; } + + cond_resched(); } return 0; @@ -9367,9 +9369,13 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, break; } } + + cond_resched(); } list_for_each_entry(set, &ctx->table->sets, list) { + cond_resched(); + if (!nft_is_active_next(ctx->net, set)) continue; if (!(set->flags & NFT_SET_MAP) || diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index a364f8e5e698..87a9009d5234 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -843,11 +843,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) +nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; if (diff < 0) { + unsigned int min_len = skb_transport_offset(e->skb); + + if (data_len < min_len) + return -EINVAL; + if (pskb_trim(e->skb, data_len)) return -ENOMEM; } else if (diff > 0) { diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 15e4b7640dc0..da29e92c03e2 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -68,6 +68,31 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr, regs->verdict.code = ret; } +static int nft_queue_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + + switch (ctx->family) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + case NFPROTO_BRIDGE: + break; + case NFPROTO_NETDEV: /* lacks okfn */ + fallthrough; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, supported_hooks); +} + static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, @@ -164,6 +189,7 @@ static const struct nft_expr_ops nft_queue_ops = { .eval = nft_queue_eval, .init = nft_queue_init, .dump = nft_queue_dump, + .validate = nft_queue_validate, .reduce = NFT_REDUCE_READONLY, }; @@ -173,6 +199,7 @@ static const struct nft_expr_ops nft_queue_sreg_ops = { .eval = nft_queue_sreg_eval, .init = nft_queue_sreg_init, .dump = nft_queue_sreg_dump, + .validate = nft_queue_validate, .reduce = NFT_REDUCE_READONLY, }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d08c4728523b..5cbe07116e04 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3037,8 +3037,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; - if (sock->type == SOCK_RAW && - !dev_validate_header(dev, skb->data, len)) { + if ((sock->type == SOCK_RAW && + !dev_validate_header(dev, skb->data, len)) || !skb->len) { err = -EINVAL; goto out_free; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1a1e34480b7e..041d63ff809a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -16,6 +16,7 @@ #include <linux/in6.h> #include <linux/ip.h> #include <linux/mpls.h> +#include <linux/ppp_defs.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> @@ -67,6 +68,7 @@ struct fl_flow_key { struct flow_dissector_key_ct ct; struct flow_dissector_key_hash hash; struct flow_dissector_key_num_of_vlans num_of_vlans; + struct flow_dissector_key_pppoe pppoe; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -708,6 +710,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_NUM_OF_VLANS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 }, }; @@ -1035,6 +1039,50 @@ static void fl_set_key_vlan(struct nlattr **tb, } } +static void fl_set_key_pppoe(struct nlattr **tb, + struct flow_dissector_key_pppoe *key_val, + struct flow_dissector_key_pppoe *key_mask, + struct fl_flow_key *key, + struct fl_flow_key *mask) +{ + /* key_val::type must be set to ETH_P_PPP_SES + * because ETH_P_PPP_SES was stored in basic.n_proto + * which might get overwritten by ppp_proto + * or might be set to 0, the role of key_val::type + * is simmilar to vlan_key::tpid + */ + key_val->type = htons(ETH_P_PPP_SES); + key_mask->type = cpu_to_be16(~0); + + if (tb[TCA_FLOWER_KEY_PPPOE_SID]) { + key_val->session_id = + nla_get_be16(tb[TCA_FLOWER_KEY_PPPOE_SID]); + key_mask->session_id = cpu_to_be16(~0); + } + if (tb[TCA_FLOWER_KEY_PPP_PROTO]) { + key_val->ppp_proto = + nla_get_be16(tb[TCA_FLOWER_KEY_PPP_PROTO]); + key_mask->ppp_proto = cpu_to_be16(~0); + + if (key_val->ppp_proto == htons(PPP_IP)) { + key->basic.n_proto = htons(ETH_P_IP); + mask->basic.n_proto = cpu_to_be16(~0); + } else if (key_val->ppp_proto == htons(PPP_IPV6)) { + key->basic.n_proto = htons(ETH_P_IPV6); + mask->basic.n_proto = cpu_to_be16(~0); + } else if (key_val->ppp_proto == htons(PPP_MPLS_UC)) { + key->basic.n_proto = htons(ETH_P_MPLS_UC); + mask->basic.n_proto = cpu_to_be16(~0); + } else if (key_val->ppp_proto == htons(PPP_MPLS_MC)) { + key->basic.n_proto = htons(ETH_P_MPLS_MC); + mask->basic.n_proto = cpu_to_be16(~0); + } + } else { + key->basic.n_proto = 0; + mask->basic.n_proto = cpu_to_be16(0); + } +} + static void fl_set_key_flag(u32 flower_key, u32 flower_mask, u32 *dissector_key, u32 *dissector_mask, u32 flower_flag_bit, u32 dissector_flag_bit) @@ -1645,6 +1693,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, } } + if (key->basic.n_proto == htons(ETH_P_PPP_SES)) + fl_set_key_pppoe(tb, &key->pppoe, &mask->pppoe, key, mask); + if (key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) { fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, @@ -1917,6 +1968,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_HASH, hash); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_NUM_OF_VLANS, num_of_vlans); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_PPPOE, pppoe); skb_flow_dissector_init(dissector, keys, cnt); } @@ -3045,6 +3098,17 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, fl_dump_key_ip(skb, false, &key->ip, &mask->ip))) goto nla_put_failure; + if (mask->pppoe.session_id) { + if (nla_put_be16(skb, TCA_FLOWER_KEY_PPPOE_SID, + key->pppoe.session_id)) + goto nla_put_failure; + } + if (mask->basic.n_proto && mask->pppoe.ppp_proto) { + if (nla_put_be16(skb, TCA_FLOWER_KEY_PPP_PROTO, + key->pppoe.ppp_proto)) + goto nla_put_failure; + } + if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 599e26fc2fa8..91a0dc463c48 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -979,7 +979,7 @@ cbq_reset(struct Qdisc *sch) } -static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) +static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) { if (lss->change & TCF_CBQ_LSS_FLAGS) { cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; @@ -997,7 +997,6 @@ static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) } if (lss->change & TCF_CBQ_LSS_OFFTIME) cl->offtime = lss->offtime; - return 0; } static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index be29da09cc7a..3460abceba44 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init( if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, - 0, gfp)) - goto fail_init; + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) + goto stream_free; /* Initialize default path MTU. */ asoc->pathmtu = sp->pathmtu; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6dc95dcc0ff4..ef9fceadef8d 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out_err; + return ret; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, handle_in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; - - ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) - goto in_err; - - goto out; + return 0; -in_err: - sched->free(stream); - genradix_free(&stream->in); -out_err: - genradix_free(&stream->out); - stream->outcnt = 0; -out: - return ret; + return sctp_stream_alloc_in(stream, incnt, gfp); } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 518b1b9bf89d..1ad565ed5627 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -160,7 +160,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, if (!SCTP_SO(&asoc->stream, i)->ext) continue; - ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); + ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); if (ret) goto err; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6e70d9c10b78..79c1318af1fe 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3515,3 +3515,4 @@ MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); MODULE_ALIAS_TCP_ULP("smc"); +MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 1fca2f90a9c7..80ea7d954ece 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -268,3 +268,4 @@ module_init(smc_diag_init); module_exit(smc_diag_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); +MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME); diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index a2084ecdb97e..911fe08bc54b 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -33,17 +33,6 @@ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) vlan_id); } -int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, - void *data, size_t len) -{ - int rc; - - rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal, - pos->offset, data, len); - - return rc < 0 ? rc : 0; -} - void smc_ism_get_system_eid(u8 **eid) { if (!smc_ism_v2_capable) @@ -440,7 +429,7 @@ int smcd_register_dev(struct smcd_dev *smcd) if (list_empty(&smcd_dev_list.list)) { u8 *system_eid = NULL; - smcd->ops->get_system_eid(smcd, &system_eid); + system_eid = smcd->ops->get_system_eid(); if (system_eid[24] != '0' || system_eid[28] != '0') { smc_ism_v2_capable = true; memcpy(smc_ism_v2_system_eid, system_eid, @@ -519,13 +508,13 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) EXPORT_SYMBOL_GPL(smcd_handle_event); /* SMCD Device interrupt handler. Called from ISM device interrupt handler. - * Parameters are smcd device pointer and DMB number. Find the connection and - * schedule the tasklet for this connection. + * Parameters are smcd device pointer, DMB number, and the DMBE bitmask. + * Find the connection and schedule the tasklet for this connection. * * Context: * - Function called in IRQ context from ISM device driver IRQ handler. */ -void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) +void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno, u16 dmbemask) { struct smc_connection *conn = NULL; unsigned long flags; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 004b22a13ffa..d6b2db604fe8 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -28,13 +28,6 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */ refcount_t refcnt; /* Reference count */ }; -struct smc_ism_position { /* ISM device position to write to */ - u64 token; /* Token of DMB */ - u32 offset; /* Offset into DMBE */ - u8 index; /* Index of DMBE */ - u8 signal; /* Generate interrupt on owner side */ -}; - struct smcd_dev; int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); @@ -45,12 +38,21 @@ int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id); int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, struct smc_buf_desc *dmb_desc); int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); -int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, - void *data, size_t len); int smc_ism_signal_shutdown(struct smc_link_group *lgr); void smc_ism_get_system_eid(u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); bool smc_ism_is_v2_capable(void); void smc_ism_init(void); int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); + +static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok, + unsigned int idx, bool sf, unsigned int offset, + void *data, size_t len) +{ + int rc; + + rc = smcd->ops->move_data(smcd, dmb_tok, idx, sf, offset, data, len); + return rc < 0 ? rc : 0; +} + #endif diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 4e8377657a62..64dedffe9d26 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -320,15 +320,11 @@ int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset, int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, u32 offset, int signal) { - struct smc_ism_position pos; int rc; - memset(&pos, 0, sizeof(pos)); - pos.token = conn->peer_token; - pos.index = conn->peer_rmbe_idx; - pos.offset = conn->tx_off + offset; - pos.signal = signal; - rc = smc_ism_write(conn->lgr->smcd, &pos, data, len); + rc = smc_ism_write(conn->lgr->smcd, conn->peer_token, + conn->peer_rmbe_idx, signal, conn->tx_off + offset, + data, len); if (rc) conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; return rc; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 43509c7e90fc..f1c3b8eb4b3d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_sk_backlog_rcv; - sk->sk_rcvbuf = sysctl_tipc_rmem[1]; + sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]); sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; diff --git a/net/tls/tls.h b/net/tls/tls.h index 3740740504e3..0e840a0c3437 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. * @@ -127,8 +128,24 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_offload_context_tx *offload_ctx, struct tls_crypto_info *crypto_info); -int tls_strp_msg_hold(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *dst); +int tls_strp_dev_init(void); +void tls_strp_dev_exit(void); + +void tls_strp_done(struct tls_strparser *strp); +void tls_strp_stop(struct tls_strparser *strp); +int tls_strp_init(struct tls_strparser *strp, struct sock *sk); +void tls_strp_data_ready(struct tls_strparser *strp); + +void tls_strp_check_rcv(struct tls_strparser *strp); +void tls_strp_msg_done(struct tls_strparser *strp); + +int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); +void tls_rx_msg_ready(struct tls_strparser *strp); + +void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); +int tls_strp_msg_cow(struct tls_sw_context_rx *ctx); +struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx); +int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { @@ -139,7 +156,13 @@ static inline struct tls_msg *tls_msg(struct sk_buff *skb) static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx) { - return ctx->recv_pkt; + DEBUG_NET_WARN_ON_ONCE(!ctx->strp.msg_ready || !ctx->strp.anchor->len); + return ctx->strp.anchor; +} + +static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx) +{ + return ctx->strp.msg_ready; } #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index b1fcd61836d1..18c7e5c6d228 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -46,10 +46,8 @@ */ static DECLARE_RWSEM(device_offload_lock); -static void tls_device_gc_task(struct work_struct *work); +static struct workqueue_struct *destruct_wq __read_mostly; -static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task); -static LIST_HEAD(tls_device_gc_list); static LIST_HEAD(tls_device_list); static LIST_HEAD(tls_device_down_list); static DEFINE_SPINLOCK(tls_device_lock); @@ -68,47 +66,44 @@ static void tls_device_free_ctx(struct tls_context *ctx) tls_ctx_free(NULL, ctx); } -static void tls_device_gc_task(struct work_struct *work) +static void tls_device_tx_del_task(struct work_struct *work) { - struct tls_context *ctx, *tmp; - unsigned long flags; - LIST_HEAD(gc_list); - - spin_lock_irqsave(&tls_device_lock, flags); - list_splice_init(&tls_device_gc_list, &gc_list); - spin_unlock_irqrestore(&tls_device_lock, flags); - - list_for_each_entry_safe(ctx, tmp, &gc_list, list) { - struct net_device *netdev = ctx->netdev; - - if (netdev && ctx->tx_conf == TLS_HW) { - netdev->tlsdev_ops->tls_dev_del(netdev, ctx, - TLS_OFFLOAD_CTX_DIR_TX); - dev_put(netdev); - ctx->netdev = NULL; - } + struct tls_offload_context_tx *offload_ctx = + container_of(work, struct tls_offload_context_tx, destruct_work); + struct tls_context *ctx = offload_ctx->ctx; + struct net_device *netdev = ctx->netdev; - list_del(&ctx->list); - tls_device_free_ctx(ctx); - } + netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); + dev_put(netdev); + ctx->netdev = NULL; + tls_device_free_ctx(ctx); } static void tls_device_queue_ctx_destruction(struct tls_context *ctx) { unsigned long flags; + bool async_cleanup; spin_lock_irqsave(&tls_device_lock, flags); - if (unlikely(!refcount_dec_and_test(&ctx->refcount))) - goto unlock; + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) { + spin_unlock_irqrestore(&tls_device_lock, flags); + return; + } - list_move_tail(&ctx->list, &tls_device_gc_list); + list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */ + async_cleanup = ctx->netdev && ctx->tx_conf == TLS_HW; + if (async_cleanup) { + struct tls_offload_context_tx *offload_ctx = tls_offload_ctx_tx(ctx); - /* schedule_work inside the spinlock - * to make sure tls_device_down waits for that work. - */ - schedule_work(&tls_device_gc_work); -unlock: + /* queue_work inside the spinlock + * to make sure tls_device_down waits for that work. + */ + queue_work(destruct_wq, &offload_ctx->destruct_work); + } spin_unlock_irqrestore(&tls_device_lock, flags); + + if (!async_cleanup) + tls_device_free_ctx(ctx); } /* We assume that the socket is already connected */ @@ -894,27 +889,26 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx, static int tls_device_reencrypt(struct sock *sk, struct tls_sw_context_rx *sw_ctx) { - int err = 0, offset, copy, nsg, data_len, pos; - struct sk_buff *skb, *skb_iter, *unused; + int err, offset, copy, data_len, pos; + struct sk_buff *skb, *skb_iter; struct scatterlist sg[1]; struct strp_msg *rxm; char *orig_buf, *buf; - skb = tls_strp_msg(sw_ctx); - rxm = strp_msg(skb); - offset = rxm->offset; - + rxm = strp_msg(tls_strp_msg(sw_ctx)); orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation); if (!orig_buf) return -ENOMEM; buf = orig_buf; - nsg = skb_cow_data(skb, 0, &unused); - if (unlikely(nsg < 0)) { - err = nsg; + err = tls_strp_msg_cow(sw_ctx); + if (unlikely(err)) goto free_buf; - } + + skb = tls_strp_msg(sw_ctx); + rxm = strp_msg(skb); + offset = rxm->offset; sg_init_table(sg, 1); sg_set_buf(&sg[0], buf, @@ -1151,6 +1145,9 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) start_marker_record->len = 0; start_marker_record->num_frags = 0; + INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); + offload_ctx->ctx = ctx; + INIT_LIST_HEAD(&offload_ctx->records_list); list_add_tail(&start_marker_record->list, &offload_ctx->records_list); spin_lock_init(&offload_ctx->lock); @@ -1384,13 +1381,18 @@ static int tls_device_down(struct net_device *netdev) * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. * Now release the ref taken above. */ - if (refcount_dec_and_test(&ctx->refcount)) + if (refcount_dec_and_test(&ctx->refcount)) { + /* sk_destruct ran after tls_device_down took a ref, and + * it returned early. Complete the destruction here. + */ + list_del(&ctx->list); tls_device_free_ctx(ctx); + } } up_write(&device_offload_lock); - flush_work(&tls_device_gc_work); + flush_workqueue(destruct_wq); return NOTIFY_DONE; } @@ -1431,12 +1433,23 @@ static struct notifier_block tls_dev_notifier = { int __init tls_device_init(void) { - return register_netdevice_notifier(&tls_dev_notifier); + int err; + + destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0); + if (!destruct_wq) + return -ENOMEM; + + err = register_netdevice_notifier(&tls_dev_notifier); + if (err) + destroy_workqueue(destruct_wq); + + return err; } void __exit tls_device_cleanup(void) { unregister_netdevice_notifier(&tls_dev_notifier); - flush_work(&tls_device_gc_work); + flush_workqueue(destruct_wq); + destroy_workqueue(destruct_wq); clean_acked_data_flush(); } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9703636cfc60..08ddf9d837ae 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -725,6 +725,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, if (tx) { ctx->sk_write_space = sk->sk_write_space; sk->sk_write_space = tls_write_space; + } else { + struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(ctx); + + tls_strp_check_rcv(&rx_ctx->strp); } return 0; @@ -1141,20 +1145,28 @@ static int __init tls_register(void) if (err) return err; + err = tls_strp_dev_init(); + if (err) + goto err_pernet; + err = tls_device_init(); - if (err) { - unregister_pernet_subsys(&tls_proc_ops); - return err; - } + if (err) + goto err_strp; tcp_register_ulp(&tcp_tls_ulp_ops); return 0; +err_strp: + tls_strp_dev_exit(); +err_pernet: + unregister_pernet_subsys(&tls_proc_ops); + return err; } static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); + tls_strp_dev_exit(); tls_device_cleanup(); unregister_pernet_subsys(&tls_proc_ops); } diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 9ccab79a6e1e..f0b7c9122fba 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -1,17 +1,494 @@ // SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */ #include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <net/strparser.h> +#include <net/tcp.h> +#include <net/sock.h> +#include <net/tls.h> #include "tls.h" -int tls_strp_msg_hold(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *dst) +static struct workqueue_struct *tls_strp_wq; + +static void tls_strp_abort_strp(struct tls_strparser *strp, int err) +{ + if (strp->stopped) + return; + + strp->stopped = 1; + + /* Report an error on the lower socket */ + strp->sk->sk_err = -err; + sk_error_report(strp->sk); +} + +static void tls_strp_anchor_free(struct tls_strparser *strp) +{ + struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); + + DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1); + shinfo->frag_list = NULL; + consume_skb(strp->anchor); + strp->anchor = NULL; +} + +/* Create a new skb with the contents of input copied to its page frags */ +static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp) { - struct sk_buff *clone; + struct strp_msg *rxm; + struct sk_buff *skb; + int i, err, offset; + + skb = alloc_skb_with_frags(0, strp->anchor->len, TLS_PAGE_ORDER, + &err, strp->sk->sk_allocation); + if (!skb) + return NULL; + + offset = strp->stm.offset; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset, + skb_frag_address(frag), + skb_frag_size(frag))); + offset += skb_frag_size(frag); + } + + skb_copy_header(skb, strp->anchor); + rxm = strp_msg(skb); + rxm->offset = 0; + return skb; +} + +/* Steal the input skb, input msg is invalid after calling this function */ +struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx) +{ + struct tls_strparser *strp = &ctx->strp; + +#ifdef CONFIG_TLS_DEVICE + DEBUG_NET_WARN_ON_ONCE(!strp->anchor->decrypted); +#else + /* This function turns an input into an output, + * that can only happen if we have offload. + */ + WARN_ON(1); +#endif + + if (strp->copy_mode) { + struct sk_buff *skb; + + /* Replace anchor with an empty skb, this is a little + * dangerous but __tls_cur_msg() warns on empty skbs + * so hopefully we'll catch abuses. + */ + skb = alloc_skb(0, strp->sk->sk_allocation); + if (!skb) + return NULL; - clone = skb_clone(skb, sk->sk_allocation); - if (!clone) + swap(strp->anchor, skb); + return skb; + } + + return tls_strp_msg_make_copy(strp); +} + +/* Force the input skb to be in copy mode. The data ownership remains + * with the input skb itself (meaning unpause will wipe it) but it can + * be modified. + */ +int tls_strp_msg_cow(struct tls_sw_context_rx *ctx) +{ + struct tls_strparser *strp = &ctx->strp; + struct sk_buff *skb; + + if (strp->copy_mode) + return 0; + + skb = tls_strp_msg_make_copy(strp); + if (!skb) return -ENOMEM; - __skb_queue_tail(dst, clone); + + tls_strp_anchor_free(strp); + strp->anchor = skb; + + tcp_read_done(strp->sk, strp->stm.full_len); + strp->copy_mode = 1; + return 0; } + +/* Make a clone (in the skb sense) of the input msg to keep a reference + * to the underlying data. The reference-holding skbs get placed on + * @dst. + */ +int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst) +{ + struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); + + if (strp->copy_mode) { + struct sk_buff *skb; + + WARN_ON_ONCE(!shinfo->nr_frags); + + /* We can't skb_clone() the anchor, it gets wiped by unpause */ + skb = alloc_skb(0, strp->sk->sk_allocation); + if (!skb) + return -ENOMEM; + + __skb_queue_tail(dst, strp->anchor); + strp->anchor = skb; + } else { + struct sk_buff *iter, *clone; + int chunk, len, offset; + + offset = strp->stm.offset; + len = strp->stm.full_len; + iter = shinfo->frag_list; + + while (len > 0) { + if (iter->len <= offset) { + offset -= iter->len; + goto next; + } + + chunk = iter->len - offset; + offset = 0; + + clone = skb_clone(iter, strp->sk->sk_allocation); + if (!clone) + return -ENOMEM; + __skb_queue_tail(dst, clone); + + len -= chunk; +next: + iter = iter->next; + } + } + + return 0; +} + +static void tls_strp_flush_anchor_copy(struct tls_strparser *strp) +{ + struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); + int i; + + DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1); + + for (i = 0; i < shinfo->nr_frags; i++) + __skb_frag_unref(&shinfo->frags[i], false); + shinfo->nr_frags = 0; + strp->copy_mode = 0; +} + +static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, + unsigned int offset, size_t in_len) +{ + struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data; + struct sk_buff *skb; + skb_frag_t *frag; + size_t len, chunk; + int sz; + + if (strp->msg_ready) + return 0; + + skb = strp->anchor; + frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE]; + + len = in_len; + /* First make sure we got the header */ + if (!strp->stm.full_len) { + /* Assume one page is more than enough for headers */ + chunk = min_t(size_t, len, PAGE_SIZE - skb_frag_size(frag)); + WARN_ON_ONCE(skb_copy_bits(in_skb, offset, + skb_frag_address(frag) + + skb_frag_size(frag), + chunk)); + + sz = tls_rx_msg_size(strp, strp->anchor); + if (sz < 0) { + desc->error = sz; + return 0; + } + + /* We may have over-read, sz == 0 is guaranteed under-read */ + if (sz > 0) + chunk = min_t(size_t, chunk, sz - skb->len); + + skb->len += chunk; + skb->data_len += chunk; + skb_frag_size_add(frag, chunk); + frag++; + len -= chunk; + offset += chunk; + + strp->stm.full_len = sz; + if (!strp->stm.full_len) + goto read_done; + } + + /* Load up more data */ + while (len && strp->stm.full_len > skb->len) { + chunk = min_t(size_t, len, strp->stm.full_len - skb->len); + chunk = min_t(size_t, chunk, PAGE_SIZE - skb_frag_size(frag)); + WARN_ON_ONCE(skb_copy_bits(in_skb, offset, + skb_frag_address(frag) + + skb_frag_size(frag), + chunk)); + + skb->len += chunk; + skb->data_len += chunk; + skb_frag_size_add(frag, chunk); + frag++; + len -= chunk; + offset += chunk; + } + + if (strp->stm.full_len == skb->len) { + desc->count = 0; + + strp->msg_ready = 1; + tls_rx_msg_ready(strp); + } + +read_done: + return in_len - len; +} + +static int tls_strp_read_copyin(struct tls_strparser *strp) +{ + struct socket *sock = strp->sk->sk_socket; + read_descriptor_t desc; + + desc.arg.data = strp; + desc.error = 0; + desc.count = 1; /* give more than one skb per call */ + + /* sk should be locked here, so okay to do read_sock */ + sock->ops->read_sock(strp->sk, &desc, tls_strp_copyin); + + return desc.error; +} + +static int tls_strp_read_short(struct tls_strparser *strp) +{ + struct skb_shared_info *shinfo; + struct page *page; + int need_spc, len; + + /* If the rbuf is small or rcv window has collapsed to 0 we need + * to read the data out. Otherwise the connection will stall. + * Without pressure threshold of INT_MAX will never be ready. + */ + if (likely(!tcp_epollin_ready(strp->sk, INT_MAX))) + return 0; + + shinfo = skb_shinfo(strp->anchor); + shinfo->frag_list = NULL; + + /* If we don't know the length go max plus page for cipher overhead */ + need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; + + for (len = need_spc; len > 0; len -= PAGE_SIZE) { + page = alloc_page(strp->sk->sk_allocation); + if (!page) { + tls_strp_flush_anchor_copy(strp); + return -ENOMEM; + } + + skb_fill_page_desc(strp->anchor, shinfo->nr_frags++, + page, 0, 0); + } + + strp->copy_mode = 1; + strp->stm.offset = 0; + + strp->anchor->len = 0; + strp->anchor->data_len = 0; + strp->anchor->truesize = round_up(need_spc, PAGE_SIZE); + + tls_strp_read_copyin(strp); + + return 0; +} + +static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) +{ + struct tcp_sock *tp = tcp_sk(strp->sk); + struct sk_buff *first; + u32 offset; + + first = tcp_recv_skb(strp->sk, tp->copied_seq, &offset); + if (WARN_ON_ONCE(!first)) + return; + + /* Bestow the state onto the anchor */ + strp->anchor->len = offset + len; + strp->anchor->data_len = offset + len; + strp->anchor->truesize = offset + len; + + skb_shinfo(strp->anchor)->frag_list = first; + + skb_copy_header(strp->anchor, first); + strp->anchor->destructor = NULL; + + strp->stm.offset = offset; +} + +void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) +{ + struct strp_msg *rxm; + struct tls_msg *tlm; + + DEBUG_NET_WARN_ON_ONCE(!strp->msg_ready); + DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len); + + if (!strp->copy_mode && force_refresh) { + if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) + return; + + tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); + } + + rxm = strp_msg(strp->anchor); + rxm->full_len = strp->stm.full_len; + rxm->offset = strp->stm.offset; + tlm = tls_msg(strp->anchor); + tlm->control = strp->mark; +} + +/* Called with lock held on lower socket */ +static int tls_strp_read_sock(struct tls_strparser *strp) +{ + int sz, inq; + + inq = tcp_inq(strp->sk); + if (inq < 1) + return 0; + + if (unlikely(strp->copy_mode)) + return tls_strp_read_copyin(strp); + + if (inq < strp->stm.full_len) + return tls_strp_read_short(strp); + + if (!strp->stm.full_len) { + tls_strp_load_anchor_with_queue(strp, inq); + + sz = tls_rx_msg_size(strp, strp->anchor); + if (sz < 0) { + tls_strp_abort_strp(strp, sz); + return sz; + } + + strp->stm.full_len = sz; + + if (!strp->stm.full_len || inq < strp->stm.full_len) + return tls_strp_read_short(strp); + } + + strp->msg_ready = 1; + tls_rx_msg_ready(strp); + + return 0; +} + +void tls_strp_check_rcv(struct tls_strparser *strp) +{ + if (unlikely(strp->stopped) || strp->msg_ready) + return; + + if (tls_strp_read_sock(strp) == -ENOMEM) + queue_work(tls_strp_wq, &strp->work); +} + +/* Lower sock lock held */ +void tls_strp_data_ready(struct tls_strparser *strp) +{ + /* This check is needed to synchronize with do_tls_strp_work. + * do_tls_strp_work acquires a process lock (lock_sock) whereas + * the lock held here is bh_lock_sock. The two locks can be + * held by different threads at the same time, but bh_lock_sock + * allows a thread in BH context to safely check if the process + * lock is held. In this case, if the lock is held, queue work. + */ + if (sock_owned_by_user_nocheck(strp->sk)) { + queue_work(tls_strp_wq, &strp->work); + return; + } + + tls_strp_check_rcv(strp); +} + +static void tls_strp_work(struct work_struct *w) +{ + struct tls_strparser *strp = + container_of(w, struct tls_strparser, work); + + lock_sock(strp->sk); + tls_strp_check_rcv(strp); + release_sock(strp->sk); +} + +void tls_strp_msg_done(struct tls_strparser *strp) +{ + WARN_ON(!strp->stm.full_len); + + if (likely(!strp->copy_mode)) + tcp_read_done(strp->sk, strp->stm.full_len); + else + tls_strp_flush_anchor_copy(strp); + + strp->msg_ready = 0; + memset(&strp->stm, 0, sizeof(strp->stm)); + + tls_strp_check_rcv(strp); +} + +void tls_strp_stop(struct tls_strparser *strp) +{ + strp->stopped = 1; +} + +int tls_strp_init(struct tls_strparser *strp, struct sock *sk) +{ + memset(strp, 0, sizeof(*strp)); + + strp->sk = sk; + + strp->anchor = alloc_skb(0, GFP_KERNEL); + if (!strp->anchor) + return -ENOMEM; + + INIT_WORK(&strp->work, tls_strp_work); + + return 0; +} + +/* strp must already be stopped so that tls_strp_recv will no longer be called. + * Note that tls_strp_done is not called with the lower socket held. + */ +void tls_strp_done(struct tls_strparser *strp) +{ + WARN_ON(!strp->stopped); + + cancel_work_sync(&strp->work); + tls_strp_anchor_free(strp); +} + +int __init tls_strp_dev_init(void) +{ + tls_strp_wq = create_workqueue("tls-strp"); + if (unlikely(!tls_strp_wq)) + return -ENOMEM; + + return 0; +} + +void tls_strp_dev_exit(void) +{ + destroy_workqueue(tls_strp_wq); +} diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index ed5e6f1df9c7..17db8c8811fa 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1283,13 +1283,16 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, static int tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, - long timeo) + bool released) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); + long timeo; + + timeo = sock_rcvtimeo(sk, nonblock); - while (!ctx->recv_pkt) { + while (!tls_strp_msg_ready(ctx)) { if (!sk_psock_queue_empty(psock)) return 0; @@ -1297,8 +1300,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_error(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) { - __strp_unpause(&ctx->strp); - if (ctx->recv_pkt) + tls_strp_check_rcv(&ctx->strp); + if (tls_strp_msg_ready(ctx)) break; } @@ -1308,13 +1311,15 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (sock_flag(sk, SOCK_DONE)) return 0; - if (nonblock || !timeo) + if (!timeo) return -EAGAIN; + released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, &timeo, - ctx->recv_pkt || !sk_psock_queue_empty(psock), + tls_strp_msg_ready(ctx) || + !sk_psock_queue_empty(psock), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1324,6 +1329,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); } + tls_strp_msg_load(&ctx->strp, released); + return 1; } @@ -1408,13 +1415,15 @@ tls_alloc_clrtxt_skb(struct sock *sk, struct sk_buff *skb, /* Decrypt handlers * - * tls_decrypt_sg() and tls_decrypt_device() are decrypt handlers. + * tls_decrypt_sw() and tls_decrypt_device() are decrypt handlers. * They must transform the darg in/out argument are as follows: * | Input | Output * ------------------------------------------------------------------- * zc | Zero-copy decrypt allowed | Zero-copy performed * async | Async decrypt allowed | Async crypto used / in progress * skb | * | Output skb + * + * If ZC decryption was performed darg.skb will point to the input skb. */ /* This function decrypts the input skb into either out_iov or in out_sg @@ -1567,7 +1576,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, clear_skb = NULL; if (unlikely(darg->async)) { - err = tls_strp_msg_hold(sk, skb, &ctx->async_hold); + err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold); if (err) __skb_queue_tail(&ctx->async_hold, darg->skb); return err; @@ -1588,49 +1597,22 @@ exit_free_skb: } static int -tls_decrypt_device(struct sock *sk, struct tls_context *tls_ctx, - struct tls_decrypt_arg *darg) -{ - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - int err; - - if (tls_ctx->rx_conf != TLS_HW) - return 0; - - err = tls_device_decrypted(sk, tls_ctx); - if (err <= 0) - return err; - - darg->zc = false; - darg->async = false; - darg->skb = tls_strp_msg(ctx); - ctx->recv_pkt = NULL; - return 1; -} - -static int tls_rx_one_record(struct sock *sk, struct iov_iter *dest, - struct tls_decrypt_arg *darg) +tls_decrypt_sw(struct sock *sk, struct tls_context *tls_ctx, + struct msghdr *msg, struct tls_decrypt_arg *darg) { - struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm; int pad, err; - err = tls_decrypt_device(sk, tls_ctx, darg); - if (err < 0) - return err; - if (err) - goto decrypt_done; - - err = tls_decrypt_sg(sk, dest, NULL, darg); + err = tls_decrypt_sg(sk, &msg->msg_iter, NULL, darg); if (err < 0) { if (err == -EBADMSG) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); return err; } - if (darg->async) - goto decrypt_done; + /* keep going even for ->async, the code below is TLS 1.3 */ + /* If opportunistic TLS 1.3 ZC failed retry without ZC */ if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION && darg->tail != TLS_RECORD_TYPE_DATA)) { @@ -1638,21 +1620,87 @@ static int tls_rx_one_record(struct sock *sk, struct iov_iter *dest, if (!darg->tail) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXNOPADVIOL); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTRETRY); - return tls_rx_one_record(sk, dest, darg); + return tls_decrypt_sw(sk, tls_ctx, msg, darg); } -decrypt_done: - if (darg->skb == ctx->recv_pkt) - ctx->recv_pkt = NULL; - pad = tls_padding_length(prot, darg->skb, darg); if (pad < 0) { - consume_skb(darg->skb); + if (darg->skb != tls_strp_msg(ctx)) + consume_skb(darg->skb); return pad; } rxm = strp_msg(darg->skb); rxm->full_len -= pad; + + return 0; +} + +static int +tls_decrypt_device(struct sock *sk, struct msghdr *msg, + struct tls_context *tls_ctx, struct tls_decrypt_arg *darg) +{ + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct strp_msg *rxm; + int pad, err; + + if (tls_ctx->rx_conf != TLS_HW) + return 0; + + err = tls_device_decrypted(sk, tls_ctx); + if (err <= 0) + return err; + + pad = tls_padding_length(prot, tls_strp_msg(ctx), darg); + if (pad < 0) + return pad; + + darg->async = false; + darg->skb = tls_strp_msg(ctx); + /* ->zc downgrade check, in case TLS 1.3 gets here */ + darg->zc &= !(prot->version == TLS_1_3_VERSION && + tls_msg(darg->skb)->control != TLS_RECORD_TYPE_DATA); + + rxm = strp_msg(darg->skb); + rxm->full_len -= pad; + + if (!darg->zc) { + /* Non-ZC case needs a real skb */ + darg->skb = tls_strp_msg_detach(ctx); + if (!darg->skb) + return -ENOMEM; + } else { + unsigned int off, len; + + /* In ZC case nobody cares about the output skb. + * Just copy the data here. Note the skb is not fully trimmed. + */ + off = rxm->offset + prot->prepend_size; + len = rxm->full_len - prot->overhead_size; + + err = skb_copy_datagram_msg(darg->skb, off, msg, len); + if (err) + return err; + } + return 1; +} + +static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, + struct tls_decrypt_arg *darg) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct strp_msg *rxm; + int err; + + err = tls_decrypt_device(sk, msg, tls_ctx, darg); + if (!err) + err = tls_decrypt_sw(sk, tls_ctx, msg, darg); + if (err < 0) + return err; + + rxm = strp_msg(darg->skb); rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); @@ -1692,9 +1740,7 @@ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, static void tls_rx_rec_done(struct tls_sw_context_rx *ctx) { - consume_skb(ctx->recv_pkt); - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); + tls_strp_msg_done(&ctx->strp); } /* This function traverses the rx_list in tls receive context to copies the @@ -1781,7 +1827,7 @@ out: return copied ? : err; } -static void +static bool tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, size_t len_left, size_t decrypted, ssize_t done, size_t *flushed_at) @@ -1789,18 +1835,18 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, size_t max_rec; if (len_left <= decrypted) - return; + return false; max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE; if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec) - return; + return false; *flushed_at = done; - sk_flush_backlog(sk); + return sk_flush_backlog(sk); } -static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, - bool nonblock) +static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, + bool nonblock) { long timeo; int err; @@ -1831,7 +1877,7 @@ static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, WRITE_ONCE(ctx->reader_present, 1); - return timeo; + return 0; err_unlock: release_sock(sk); @@ -1868,13 +1914,12 @@ int tls_sw_recvmsg(struct sock *sk, size_t flushed_at = 0; struct strp_msg *rxm; struct tls_msg *tlm; - struct sk_buff *skb; ssize_t copied = 0; bool async = false; - int target, err = 0; - long timeo; + int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1882,9 +1927,9 @@ int tls_sw_recvmsg(struct sock *sk, return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); psock = sk_psock_get(sk); - timeo = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); - if (timeo < 0) - return timeo; + err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); + if (err < 0) + return err; bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ @@ -1907,11 +1952,12 @@ int tls_sw_recvmsg(struct sock *sk, zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && ctx->zc_capable; decrypted = 0; - while (len && (decrypted + copied < target || ctx->recv_pkt)) { + while (len && (decrypted + copied < target || tls_strp_msg_ready(ctx))) { struct tls_decrypt_arg darg; int to_decrypt, chunk; - err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, timeo); + err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, + released); if (err <= 0) { if (psock) { chunk = sk_msg_recvmsg(sk, psock, msg, len, @@ -1927,8 +1973,8 @@ int tls_sw_recvmsg(struct sock *sk, memset(&darg.inargs, 0, sizeof(darg.inargs)); - rxm = strp_msg(ctx->recv_pkt); - tlm = tls_msg(ctx->recv_pkt); + rxm = strp_msg(tls_strp_msg(ctx)); + tlm = tls_msg(tls_strp_msg(ctx)); to_decrypt = rxm->full_len - prot->overhead_size; @@ -1942,16 +1988,12 @@ int tls_sw_recvmsg(struct sock *sk, else darg.async = false; - err = tls_rx_one_record(sk, &msg->msg_iter, &darg); + err = tls_rx_one_record(sk, msg, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto recv_end; } - skb = darg.skb; - rxm = strp_msg(skb); - tlm = tls_msg(skb); - async |= darg.async; /* If the type of records being processed is not known yet, @@ -1961,24 +2003,30 @@ int tls_sw_recvmsg(struct sock *sk, * is known just after record is dequeued from stream parser. * For tls1.3, we disable async. */ - err = tls_record_content_type(msg, tlm, &control); + err = tls_record_content_type(msg, tls_msg(darg.skb), &control); if (err <= 0) { + DEBUG_NET_WARN_ON_ONCE(darg.zc); tls_rx_rec_done(ctx); put_on_rx_list_err: - __skb_queue_tail(&ctx->rx_list, skb); + __skb_queue_tail(&ctx->rx_list, darg.skb); goto recv_end; } /* periodically flush backlog, and feed strparser */ - tls_read_flush_backlog(sk, prot, len, to_decrypt, - decrypted + copied, &flushed_at); + released = tls_read_flush_backlog(sk, prot, len, to_decrypt, + decrypted + copied, + &flushed_at); /* TLS 1.3 may have updated the length by more than overhead */ + rxm = strp_msg(darg.skb); chunk = rxm->full_len; tls_rx_rec_done(ctx); if (!darg.zc) { bool partially_consumed = chunk > len; + struct sk_buff *skb = darg.skb; + + DEBUG_NET_WARN_ON_ONCE(darg.skb == ctx->strp.anchor); if (async) { /* TLS 1.2-only, to_decrypt must be text len */ @@ -1992,6 +2040,7 @@ put_on_rx_list: } if (bpf_strp_enabled) { + released = true; err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; @@ -2018,13 +2067,13 @@ put_on_rx_list: rxm->full_len -= chunk; goto put_on_rx_list; } + + consume_skb(skb); } decrypted += chunk; len -= chunk; - consume_skb(skb); - /* Return full control message to userspace before trying * to parse another message type */ @@ -2084,13 +2133,12 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; - int err = 0; - long timeo; int chunk; + int err; - timeo = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK); - if (timeo < 0) - return timeo; + err = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK); + if (err < 0) + return err; if (!skb_queue_empty(&ctx->rx_list)) { skb = __skb_dequeue(&ctx->rx_list); @@ -2098,7 +2146,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK, - timeo); + true); if (err <= 0) goto splice_read_end; @@ -2158,23 +2206,21 @@ bool tls_sw_sock_is_readable(struct sock *sk) ingress_empty = list_empty(&psock->ingress_msg); rcu_read_unlock(); - return !ingress_empty || ctx->recv_pkt || + return !ingress_empty || tls_strp_msg_ready(ctx) || !skb_queue_empty(&ctx->rx_list); } -static int tls_read_size(struct strparser *strp, struct sk_buff *skb) +int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_prot_info *prot = &tls_ctx->prot_info; char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; - struct strp_msg *rxm = strp_msg(skb); - struct tls_msg *tlm = tls_msg(skb); size_t cipher_overhead; size_t data_len = 0; int ret; /* Verify that we have a full TLS header, or wait for more data */ - if (rxm->offset + prot->prepend_size > skb->len) + if (strp->stm.offset + prot->prepend_size > skb->len) return 0; /* Sanity-check size of on-stack buffer. */ @@ -2184,11 +2230,11 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) } /* Linearize header to local buffer */ - ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size); + ret = skb_copy_bits(skb, strp->stm.offset, header, prot->prepend_size); if (ret < 0) goto read_failure; - tlm->control = header[0]; + strp->mark = header[0]; data_len = ((header[4] & 0xFF) | (header[3] << 8)); @@ -2215,7 +2261,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) } tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE, - TCP_SKB_CB(skb)->seq + rxm->offset); + TCP_SKB_CB(skb)->seq + strp->stm.offset); return data_len + TLS_HEADER_SIZE; read_failure: @@ -2224,14 +2270,11 @@ read_failure: return ret; } -static void tls_queue(struct strparser *strp, struct sk_buff *skb) +void tls_rx_msg_ready(struct tls_strparser *strp) { - struct tls_context *tls_ctx = tls_get_ctx(strp->sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - - ctx->recv_pkt = skb; - strp_pause(strp); + struct tls_sw_context_rx *ctx; + ctx = container_of(strp, struct tls_sw_context_rx, strp); ctx->saved_data_ready(strp->sk); } @@ -2241,7 +2284,7 @@ static void tls_data_ready(struct sock *sk) struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct sk_psock *psock; - strp_data_ready(&ctx->strp); + tls_strp_data_ready(&ctx->strp); psock = sk_psock_get(sk); if (psock) { @@ -2317,13 +2360,11 @@ void tls_sw_release_resources_rx(struct sock *sk) kfree(tls_ctx->rx.iv); if (ctx->aead_recv) { - kfree_skb(ctx->recv_pkt); - ctx->recv_pkt = NULL; __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); - strp_stop(&ctx->strp); + tls_strp_stop(&ctx->strp); /* If tls_sw_strparser_arm() was not called (cleanup paths) - * we still want to strp_stop(), but sk->sk_data_ready was + * we still want to tls_strp_stop(), but sk->sk_data_ready was * never swapped. */ if (ctx->saved_data_ready) { @@ -2338,7 +2379,7 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - strp_done(&ctx->strp); + tls_strp_done(&ctx->strp); } void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) @@ -2411,8 +2452,6 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) rx_ctx->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); - - strp_check_rcv(&rx_ctx->strp); } void tls_update_rx_zc_capable(struct tls_context *tls_ctx) @@ -2432,7 +2471,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct tls_sw_context_rx *sw_ctx_rx = NULL; struct cipher_context *cctx; struct crypto_aead **aead; - struct strp_callbacks cb; u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size; struct crypto_tfm *tfm; char *iv, *rec_seq, *key, *salt, *cipher_name; @@ -2666,12 +2704,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) crypto_info->version != TLS_1_3_VERSION && !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); - /* Set up strparser */ - memset(&cb, 0, sizeof(cb)); - cb.rcv_msg = tls_queue; - cb.parse_msg = tls_read_size; - - strp_init(&sw_ctx_rx->strp, sk, &cb); + tls_strp_init(&sw_ctx_rx->strp, sk); } goto out; diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 99a128a666fb..4ce5d2579387 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -13,7 +13,6 @@ #include <linux/of_gpio.h> #include <linux/of_device.h> #include <linux/clk.h> -#include <linux/pinctrl/consumer.h> #include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/spinlock.h> @@ -55,40 +54,8 @@ struct rk_i2s_dev { const struct rk_i2s_pins *pins; unsigned int bclk_ratio; spinlock_t lock; /* tx/rx lock */ - struct pinctrl *pinctrl; - struct pinctrl_state *bclk_on; - struct pinctrl_state *bclk_off; }; -static int i2s_pinctrl_select_bclk_on(struct rk_i2s_dev *i2s) -{ - int ret = 0; - - if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_on)) - ret = pinctrl_select_state(i2s->pinctrl, - i2s->bclk_on); - - if (ret) - dev_err(i2s->dev, "bclk enable failed %d\n", ret); - - return ret; -} - -static int i2s_pinctrl_select_bclk_off(struct rk_i2s_dev *i2s) -{ - - int ret = 0; - - if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_off)) - ret = pinctrl_select_state(i2s->pinctrl, - i2s->bclk_off); - - if (ret) - dev_err(i2s->dev, "bclk disable failed %d\n", ret); - - return ret; -} - static int i2s_runtime_suspend(struct device *dev) { struct rk_i2s_dev *i2s = dev_get_drvdata(dev); @@ -125,49 +92,38 @@ static inline struct rk_i2s_dev *to_info(struct snd_soc_dai *dai) return snd_soc_dai_get_drvdata(dai); } -static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on) +static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on) { unsigned int val = 0; int retry = 10; - int ret = 0; spin_lock(&i2s->lock); if (on) { - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, - I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_DMACR, + I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE); - ret = regmap_update_bits(i2s->regmap, I2S_XFER, - I2S_XFER_TXS_START | I2S_XFER_RXS_START, - I2S_XFER_TXS_START | I2S_XFER_RXS_START); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_XFER, + I2S_XFER_TXS_START | I2S_XFER_RXS_START, + I2S_XFER_TXS_START | I2S_XFER_RXS_START); i2s->tx_start = true; } else { i2s->tx_start = false; - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, - I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_DMACR, + I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE); if (!i2s->rx_start) { - ret = regmap_update_bits(i2s->regmap, I2S_XFER, - I2S_XFER_TXS_START | - I2S_XFER_RXS_START, - I2S_XFER_TXS_STOP | - I2S_XFER_RXS_STOP); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_XFER, + I2S_XFER_TXS_START | + I2S_XFER_RXS_START, + I2S_XFER_TXS_STOP | + I2S_XFER_RXS_STOP); udelay(150); - ret = regmap_update_bits(i2s->regmap, I2S_CLR, - I2S_CLR_TXC | I2S_CLR_RXC, - I2S_CLR_TXC | I2S_CLR_RXC); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_CLR, + I2S_CLR_TXC | I2S_CLR_RXC, + I2S_CLR_TXC | I2S_CLR_RXC); regmap_read(i2s->regmap, I2S_CLR, &val); @@ -182,57 +138,44 @@ static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on) } } } -end: spin_unlock(&i2s->lock); - if (ret < 0) - dev_err(i2s->dev, "lrclk update failed\n"); - - return ret; } -static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) +static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) { unsigned int val = 0; int retry = 10; - int ret = 0; spin_lock(&i2s->lock); if (on) { - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, + regmap_update_bits(i2s->regmap, I2S_DMACR, I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_ENABLE); - if (ret < 0) - goto end; - ret = regmap_update_bits(i2s->regmap, I2S_XFER, + regmap_update_bits(i2s->regmap, I2S_XFER, I2S_XFER_TXS_START | I2S_XFER_RXS_START, I2S_XFER_TXS_START | I2S_XFER_RXS_START); - if (ret < 0) - goto end; i2s->rx_start = true; } else { i2s->rx_start = false; - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, + regmap_update_bits(i2s->regmap, I2S_DMACR, I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_DISABLE); - if (ret < 0) - goto end; if (!i2s->tx_start) { - ret = regmap_update_bits(i2s->regmap, I2S_XFER, + regmap_update_bits(i2s->regmap, I2S_XFER, I2S_XFER_TXS_START | I2S_XFER_RXS_START, I2S_XFER_TXS_STOP | I2S_XFER_RXS_STOP); - if (ret < 0) - goto end; + udelay(150); - ret = regmap_update_bits(i2s->regmap, I2S_CLR, + regmap_update_bits(i2s->regmap, I2S_CLR, I2S_CLR_TXC | I2S_CLR_RXC, I2S_CLR_TXC | I2S_CLR_RXC); - if (ret < 0) - goto end; + regmap_read(i2s->regmap, I2S_CLR, &val); + /* Should wait for clear operation to finish */ while (val) { regmap_read(i2s->regmap, I2S_CLR, &val); @@ -244,12 +187,7 @@ static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) } } } -end: spin_unlock(&i2s->lock); - if (ret < 0) - dev_err(i2s->dev, "lrclk update failed\n"); - - return ret; } static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai, @@ -487,26 +425,17 @@ static int rockchip_i2s_trigger(struct snd_pcm_substream *substream, case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) - ret = rockchip_snd_rxctrl(i2s, 1); + rockchip_snd_rxctrl(i2s, 1); else - ret = rockchip_snd_txctrl(i2s, 1); - /* Do not turn on bclk if lrclk open fails. */ - if (ret < 0) - return ret; - i2s_pinctrl_select_bclk_on(i2s); + rockchip_snd_txctrl(i2s, 1); break; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { - if (!i2s->tx_start) - i2s_pinctrl_select_bclk_off(i2s); - ret = rockchip_snd_rxctrl(i2s, 0); - } else { - if (!i2s->rx_start) - i2s_pinctrl_select_bclk_off(i2s); - ret = rockchip_snd_txctrl(i2s, 0); - } + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + rockchip_snd_rxctrl(i2s, 0); + else + rockchip_snd_txctrl(i2s, 0); break; default: ret = -EINVAL; @@ -807,33 +736,6 @@ static int rockchip_i2s_probe(struct platform_device *pdev) } i2s->bclk_ratio = 64; - i2s->pinctrl = devm_pinctrl_get(&pdev->dev); - if (IS_ERR(i2s->pinctrl)) - dev_err(&pdev->dev, "failed to find i2s pinctrl\n"); - - i2s->bclk_on = pinctrl_lookup_state(i2s->pinctrl, - "bclk_on"); - if (IS_ERR_OR_NULL(i2s->bclk_on)) - dev_err(&pdev->dev, "failed to find i2s default state\n"); - else - dev_dbg(&pdev->dev, "find i2s bclk state\n"); - - i2s->bclk_off = pinctrl_lookup_state(i2s->pinctrl, - "bclk_off"); - if (IS_ERR_OR_NULL(i2s->bclk_off)) - dev_err(&pdev->dev, "failed to find i2s gpio state\n"); - else - dev_dbg(&pdev->dev, "find i2s bclk_off state\n"); - - i2s_pinctrl_select_bclk_off(i2s); - - i2s->playback_dma_data.addr = res->start + I2S_TXDR; - i2s->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - i2s->playback_dma_data.maxburst = 4; - - i2s->capture_dma_data.addr = res->start + I2S_RXDR; - i2s->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - i2s->capture_dma_data.maxburst = 4; dev_set_drvdata(&pdev->dev, i2s); diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 1cf53bb01936..7070dcffa822 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1175,7 +1175,7 @@ static int do_skeleton(int argc, char **argv) static inline void \n\ %1$s__detach(struct %1$s *obj) \n\ { \n\ - return bpf_object__detach_skeleton(obj->skeleton); \n\ + bpf_object__detach_skeleton(obj->skeleton); \n\ } \n\ ", obj_name diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f081de398b60..c81362a001ba 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1962,7 +1962,7 @@ static int profile_parse_metrics(int argc, char **argv) int selected_cnt = 0; unsigned int i; - metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); + metric_cnt = ARRAY_SIZE(metrics); while (argc > 0) { for (i = 0; i < metric_cnt; i++) { diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h index 0197042b7dfb..1ecdb911add8 100644 --- a/tools/include/uapi/asm-generic/fcntl.h +++ b/tools/include/uapi/asm-generic/fcntl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _ASM_GENERIC_FCNTL_H #define _ASM_GENERIC_FCNTL_H @@ -90,7 +91,7 @@ /* a horrid kludge trying to make sure that this will fail on old kernels */ #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) -#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) +#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) #ifndef O_NDELAY #define O_NDELAY O_NONBLOCK @@ -115,11 +116,13 @@ #define F_GETSIG 11 /* for sockets. */ #endif +#if __BITS_PER_LONG == 32 || defined(__KERNEL__) #ifndef F_GETLK64 #define F_GETLK64 12 /* using 'struct flock64' */ #define F_SETLK64 13 #define F_SETLKW64 14 #endif +#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */ #ifndef F_SETOWN_EX #define F_SETOWN_EX 15 @@ -178,6 +181,10 @@ struct f_owner_ex { blocking */ #define LOCK_UN 8 /* remove lock */ +/* + * LOCK_MAND support has been removed from the kernel. We leave the symbols + * here to not break legacy builds, but these should not be used in new code. + */ #define LOCK_MAND 32 /* This is a mandatory flock ... */ #define LOCK_READ 64 /* which allows concurrent read operations */ #define LOCK_WRITE 128 /* which allows concurrent write operations */ @@ -185,6 +192,7 @@ struct f_owner_ex { #define F_LINUX_SPECIFIC_BASE 1024 +#ifndef HAVE_ARCH_STRUCT_FLOCK struct flock { short l_type; short l_whence; @@ -209,5 +217,6 @@ struct flock64 { __ARCH_FLOCK64_PAD #endif }; +#endif /* HAVE_ARCH_STRUCT_FLOCK */ #endif /* _ASM_GENERIC_FCNTL_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 811897dadcae..860f867c50c0 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -2084,7 +2084,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) -#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5eb0df90eb2b..efcc06dafbd9 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -579,11 +579,20 @@ int bpf_obj_pin(int fd, const char *pathname) int bpf_obj_get(const char *pathname) { + return bpf_obj_get_opts(pathname, NULL); +} + +int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) +{ union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_obj_get_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); + attr.file_flags = OPTS_GET(opts, file_flags, 0); fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); return libbpf_err_errno(fd); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 88a7cc4bd76f..9c50beabdd14 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -270,8 +270,19 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values __u32 *count, const struct bpf_map_batch_opts *opts); +struct bpf_obj_get_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 file_flags; + + size_t :0; +}; +#define bpf_obj_get_opts__last_field file_flags + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_obj_get_opts(const char *pathname, + const struct bpf_obj_get_opts *opts); struct bpf_prog_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index f4d3e1e2abe2..43ca3aff2292 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -523,10 +523,17 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) * Original struct pt_regs * context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). * - * At the moment BPF_KSYSCALL does not handle all the calling convention - * quirks for mmap(), clone() and compat syscalls transparrently. This may or - * may not change in the future. User needs to take extra measures to handle - * such quirks explicitly, if necessary. + * At the moment BPF_KSYSCALL does not transparently handle all the calling + * convention quirks for the following syscalls: + * + * - mmap(): __ARCH_WANT_SYS_OLD_MMAP. + * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and + * CONFIG_CLONE_BACKWARDS3. + * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL. + * - compat syscalls. + * + * This may or may not change in the future. User needs to take extra measures + * to handle such quirks explicitly, if necessary. * * This macro relies on BPF CO-RE support and virtual __kconfig externs. */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b01fe01b0761..50d41815f431 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9995,6 +9995,10 @@ static const char *arch_specific_syscall_pfx(void) return "mips"; #elif defined(__riscv) return "riscv"; +#elif defined(__powerpc__) + return "powerpc"; +#elif defined(__powerpc64__) + return "powerpc64"; #else return NULL; #endif @@ -10127,8 +10131,13 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { + /* arch_specific_syscall_pfx() should never return NULL here + * because it is guarded by kernel_supports(). However, since + * compiler does not know that we have an explicit conditional + * as well. + */ snprintf(func_name, sizeof(func_name), "__%s_sys_%s", - arch_specific_syscall_pfx(), syscall_name); + arch_specific_syscall_pfx() ? : "", syscall_name); } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0625adb9e888..119e6e1ea7f1 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -355,6 +355,7 @@ LIBBPF_0.8.0 { LIBBPF_1.0.0 { global: + bpf_obj_get_opts; bpf_prog_query_opts; bpf_program__attach_ksyscall; btf__add_enum64; diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST new file mode 100644 index 000000000000..939de574fc7f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST @@ -0,0 +1,6 @@ +# TEMPORARY +get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge +stacktrace_build_id_nmi +stacktrace_build_id +task_fd_query_rawtp +varlen diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x new file mode 100644 index 000000000000..e33cab34d22f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -0,0 +1,67 @@ +# TEMPORARY +atomics # attach(add): actual -524 <= expected 0 (trampoline) +bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) +bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) +bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +bpf_loop # attaches to __x64_sys_nanosleep +bpf_mod_race # BPF trampoline +bpf_nf # JIT does not support calling kernel function +core_read_macros # unknown func bpf_probe_read#4 (overlapping) +d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) +dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) +fentry_fexit # fentry attach failed: -524 (trampoline) +fentry_test # fentry_first_attach unexpected error: -524 (trampoline) +fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline) +fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) +fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline) +fexit_test # fexit_first_attach unexpected error: -524 (trampoline) +get_func_args_test # trampoline +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) +get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) +ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) +ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) +ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) +modify_return # modify_return attach failed: -524 (trampoline) +module_attach # skel_attach skeleton attach failed: -524 (trampoline) +mptcp +kprobe_multi_test # relies on fentry +netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) +probe_user # check_kprobe_res wrong kprobe res from probe read (?) +recursion # skel_attach unexpected error: -524 (trampoline) +ringbuf # skel_load skeleton load failed (?) +sk_assign # Can't read on server: Invalid argument (?) +sk_lookup # endianness problem +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline) +socket_cookie # prog_attach unexpected error: -524 (trampoline) +stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) +tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) +task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) +test_bpffs # bpffs test failed 255 (iterator) +test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) +test_ima # failed to auto-attach program 'ima': -524 (trampoline) +test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) +test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) +test_overhead # attach_fentry unexpected error: -524 (trampoline) +test_profiler # unknown func bpf_probe_read_str#45 (overlapping) +timer # failed to auto-attach program 'test1': -524 (trampoline) +timer_crash # trampoline +timer_mim # failed to auto-attach program 'test1': -524 (trampoline) +trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) +trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) +trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) +trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) +vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) +xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) +xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +map_kptr # failed to open_and_load program: -524 (trampoline) +bpf_cookie # failed to open_and_load program: -524 (trampoline) +xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) +send_signal # intermittently fails to receive signal +select_reuseport # intermittently fails on new s390x setup +xdp_synproxy # JIT does not support calling kernel function (kfunc) +unpriv_bpf_disabled # fentry diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c05904d631ec..fabf0c014349 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,65 +1,64 @@ +CONFIG_BLK_DEV_LOOP=y CONFIG_BPF=y -CONFIG_BPF_SYSCALL=y -CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y -CONFIG_TEST_BPF=m +CONFIG_BPF_JIT=y +CONFIG_BPF_LIRC_MODE2=y +CONFIG_BPF_LSM=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y -CONFIG_NETDEVSIM=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_INGRESS=y -CONFIG_NET_IPIP=y -CONFIG_IPV6=y -CONFIG_NET_IPGRE_DEMUX=y -CONFIG_NET_IPGRE=y -CONFIG_IPV6_GRE=y -CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m -CONFIG_VXLAN=y -CONFIG_GENEVE=y -CONFIG_NET_CLS_FLOWER=m -CONFIG_LWTUNNEL=y -CONFIG_BPF_STREAM_PARSER=y -CONFIG_XDP_SOCKETS=y +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_DYNAMIC_FTRACE=y +CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y -CONFIG_IPV6_TUNNEL=y +CONFIG_FUNCTION_TRACER=y +CONFIG_GENEVE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_IMA=y +CONFIG_IMA_READ_POLICY=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IPV6=y +CONFIG_IPV6_FOU=m +CONFIG_IPV6_FOU_TUNNEL=m CONFIG_IPV6_GRE=y CONFIG_IPV6_SEG6_BPF=y +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=y +CONFIG_LIRC=y +CONFIG_LWTUNNEL=y +CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPTCP=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_CLS_FLOWER=m CONFIG_NET_FOU=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IPV6_FOU=m -CONFIG_IPV6_FOU_TUNNEL=m -CONFIG_MPLS=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_IPV6_SIT=m -CONFIG_BPF_JIT=y -CONFIG_BPF_LSM=y -CONFIG_SECURITY=y -CONFIG_RC_CORE=y -CONFIG_LIRC=y -CONFIG_BPF_LIRC_MODE2=y -CONFIG_IMA=y -CONFIG_SECURITYFS=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_READ_POLICY=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_FUNCTION_TRACER=y -CONFIG_DYNAMIC_FTRACE=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCHED=y +CONFIG_NETDEVSIM=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NF_CONNTRACK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y -CONFIG_NF_CONNTRACK=y +CONFIG_RC_CORE=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y -CONFIG_FPROBE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_MPTCP=y -CONFIG_NETFILTER_SYNPROXY=y -CONFIG_NETFILTER_XT_TARGET_CT=y -CONFIG_NETFILTER_XT_MATCH_STATE=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_SYNPROXY=y -CONFIG_IP_NF_RAW=y +CONFIG_VXLAN=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x new file mode 100644 index 000000000000..f8a7a258a718 --- /dev/null +++ b/tools/testing/selftests/bpf/config.s390x @@ -0,0 +1,147 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FANOTIFY=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_MARCH_Z10_FEATURES=y +CONFIG_HAVE_MARCH_Z196_FEATURES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MARCH_Z196=y +CONFIG_MARCH_Z196_TUNE=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NF_TABLES=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SECURITY_NETWORK=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 new file mode 100644 index 000000000000..f0859a1d37ab --- /dev/null +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -0,0 +1,251 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_AGP_SIS=y +CONFIG_AGP_VIA=y +CONFIG_AMIGA_PARTITION=y +CONFIG_AUDIT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BONDING=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTTIME_TRACING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_KPROBE_OVERRIDE=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_BSD_DISKLABEL=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPUSETS=y +CONFIG_CRC_T10DIF=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_XXHASH=y +CONFIG_DCB=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEFAULT_FQ_CODEL=y +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y +CONFIG_DNS_RESOLVER=y +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FB=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_VESA=y +CONFIG_FONT_8x16=y +CONFIG_FONT_MINI_4x6=y +CONFIG_FONTS=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_GART_IOMMU=y +CONFIG_GENERIC_PHY=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HID_A4TECH=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KYE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HPET=y +CONFIG_HUGETLBFS=y +CONFIG_HWPOISON_INJECT=y +CONFIG_HZ_1000=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_INTEL_POWERCLAMP=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IRQ_POLL=y +CONFIG_JUMP_LABEL=y +CONFIG_KARMA_PARTITION=y +CONFIG_KEXEC=y +CONFIG_KPROBES=y +CONFIG_KSM=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_LOG_BUF_SHIFT=21 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=0 +CONFIG_LOGO=y +CONFIG_LSM="selinux,bpf,integrity" +CONFIG_MAC_PARTITION=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MCORE2=y +CONFIG_MEMCG=y +CONFIG_MEMORY_FAILURE=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_SCH_DEFAULT=y +CONFIG_NET_SCH_FQ_CODEL=y +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETLABEL=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NO_HZ=y +CONFIG_NR_CPUS=128 +CONFIG_NUMA=y +CONFIG_NUMA_BALANCING=y +CONFIG_NVMEM=y +CONFIG_OSF_PARTITION=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_PCI_IOV=y +CONFIG_PCI_MSI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_PREEMPT=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTP_1588_CLOCK=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_SCHEDSTATS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIO_LIBPS2=y +CONFIG_SGI_PARTITION=y +CONFIG_SMP=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_SYNC_FILE=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_MD5SIG=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_USER_NS=y +CONFIG_VALIDATE_FS_PARSER=y +CONFIG_VETH=y +CONFIG_VIRT_DRIVERS=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_X86_CPUID=y +CONFIG_X86_MSR=y +CONFIG_X86_POWERNOW_K8=y +CONFIG_XDP_SOCKETS_DIAG=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_USER=y +CONFIG_ZEROPLUS_FF=y diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index abf890d066eb..34dbd2adc157 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -4,25 +4,35 @@ /* TODO: corrupts other tests uses connect() */ void serial_test_probe_user(void) { - const char *prog_name = "handle_sys_connect"; + static const char *const prog_names[] = { + "handle_sys_connect", +#if defined(__s390x__) + "handle_sys_socketcall", +#endif + }; + enum { prog_count = ARRAY_SIZE(prog_names) }; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; struct sockaddr curr, orig, tmp; struct sockaddr_in *in = (struct sockaddr_in *)&curr; - struct bpf_link *kprobe_link = NULL; - struct bpf_program *kprobe_prog; + struct bpf_link *kprobe_links[prog_count] = {}; + struct bpf_program *kprobe_progs[prog_count]; struct bpf_object *obj; static const int zero = 0; + size_t i; obj = bpf_object__open_file(obj_file, &opts); if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!kprobe_prog, "find_probe", - "prog '%s' not found\n", prog_name)) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_progs[i] = + bpf_object__find_program_by_name(obj, prog_names[i]); + if (CHECK(!kprobe_progs[i], "find_probe", + "prog '%s' not found\n", prog_names[i])) + goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -33,9 +43,11 @@ void serial_test_probe_user(void) "err %d\n", results_map_fd)) goto cleanup; - kprobe_link = bpf_program__attach(kprobe_prog); - if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_links[i] = bpf_program__attach(kprobe_progs[i]); + if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe")) + goto cleanup; + } memset(&curr, 0, sizeof(curr)); in->sin_family = AF_INET; @@ -69,6 +81,7 @@ void serial_test_probe_user(void) inet_ntoa(in->sin_addr), ntohs(in->sin_port))) goto cleanup; cleanup: - bpf_link__destroy(kprobe_link); + for (i = 0; i < prog_count; i++) + bpf_link__destroy(kprobe_links[i]); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index d71226e34c34..d63a20fbed33 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -64,7 +64,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - for (int i = 0; i < 100000000 && !sigusr1_received; i++) + for (int i = 0; i < 1000000000 && !sigusr1_received; i++) j /= i + j + 1; buf[0] = sigusr1_received ? '2' : '0'; diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 3bba4a2a0530..eea274110267 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -82,6 +82,7 @@ #define MAC_TUNL_DEV0 "52:54:00:d9:01:00" #define MAC_TUNL_DEV1 "52:54:00:d9:02:00" +#define MAC_VETH1 "52:54:00:d9:03:00" #define VXLAN_TUNL_DEV0 "vxlan00" #define VXLAN_TUNL_DEV1 "vxlan11" @@ -108,10 +109,9 @@ static int config_device(void) { SYS("ip netns add at_ns0"); - SYS("ip link add veth0 type veth peer name veth1"); + SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); SYS("ip link set veth0 netns at_ns0"); SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1"); SYS("ip link set dev veth1 up mtu 1500"); SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); @@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void) VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ SYS("ip link add dev %s type vxlan external gbp dstport 4789", @@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void) if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) goto done; + /* load and attach bpf prog to veth dev tc hook point */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + goto done; + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 8e1495008e4d..a8e501af9604 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,8 +7,7 @@ static struct sockaddr_in old; -SEC("ksyscall/connect") -int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int addrlen) +static int handle_sys_connect_common(struct sockaddr_in *uservaddr) { struct sockaddr_in new; @@ -19,4 +18,30 @@ int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int return 0; } +SEC("ksyscall/connect") +int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, + int addrlen) +{ + return handle_sys_connect_common(uservaddr); +} + +#if defined(bpf_target_s390) +#ifndef SYS_CONNECT +#define SYS_CONNECT 3 +#endif + +SEC("ksyscall/socketcall") +int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args) +{ + if (call == SYS_CONNECT) { + struct sockaddr_in *uservaddr; + + bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]); + return handle_sys_connect_common(uservaddr); + } + + return 0; +} +#endif + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 17f2f325b3f3..df0673c4ecbe 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -14,15 +14,24 @@ #include <linux/if_packet.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/icmp.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/pkt_cls.h> #include <linux/erspan.h> +#include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) +#define VXLAN_UDP_PORT 4789 + +/* Only IPv4 address assigned to veth1. + * 172.16.1.200 + */ +#define ASSIGNED_ADDR_VETH1 0xac1001c8 + struct geneve_opt { __be16 opt_class; __u8 type; @@ -33,6 +42,11 @@ struct geneve_opt { __u8 opt_data[8]; /* hard-coded to 8 byte */ }; +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct vxlan_metadata { __u32 gbp; }; @@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; + __u32 orig_daddr; __u32 index = 0; - __u32 *local_ip = NULL; - - local_ip = bpf_map_lookup_elem(&local_ip_map, &index); - if (!local_ip) { - log_err(ret); - return TC_ACT_SHOT; - } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { @@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_SHOT; } - if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { + if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) { bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", key.tunnel_id, key.local_ipv4, key.remote_ipv4, md.gbp); - bpf_printk("local_ip 0x%x\n", *local_ip); log_err(ret); return TC_ACT_SHOT; } @@ -403,6 +410,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) } SEC("tc") +int veth_set_outer_dst(struct __sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)(long)skb->data; + __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1); + void *data_end = (void *)(long)skb->data_end; + struct udphdr *udph; + struct iphdr *iph; + __u32 index = 0; + int ret = 0; + int shrink; + __s64 csum; + + if ((void *)eth + sizeof(*eth) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + iph = (struct iphdr *)(eth + 1); + if ((void *)iph + sizeof(*iph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (iph->protocol != IPPROTO_UDP) + return TC_ACT_OK; + + udph = (struct udphdr *)(iph + 1); + if ((void *)udph + sizeof(*udph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (udph->dest != bpf_htons(VXLAN_UDP_PORT)) + return TC_ACT_OK; + + if (iph->daddr != assigned_ip) { + csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip, + sizeof(__u32), 0); + if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr), + &assigned_ip, sizeof(__u32), 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check), + 0, csum, 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + bpf_skb_change_type(skb, PACKET_HOST); + } + return TC_ACT_OK; +} + +SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c639f2e56fc5..3561c97701f2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1604,11 +1604,8 @@ int main(int argc, char **argv) struct prog_test_def *test = &prog_test_defs[i]; test->test_num = i + 1; - if (should_run(&env.test_selector, - test->test_num, test->test_name)) - test->should_run = true; - else - test->should_run = false; + test->should_run = should_run(&env.test_selector, + test->test_num, test->test_name); if ((test->run_test == NULL && test->run_serial_test == NULL) || (test->run_test != NULL && test->run_serial_test != NULL)) { diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index e0bb04a97e10..b86ae4a2e5c5 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -30,8 +30,7 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" -KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" @@ -269,26 +268,42 @@ is_rel_path() [[ ${path:0:1} != "/" ]] } +do_update_kconfig() +{ + local kernel_checkout="$1" + local kconfig_file="$2" + + rm -f "$kconfig_file" 2> /dev/null + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + cat "$kconfig_src" >> "$kconfig_file" + done +} + update_kconfig() { - local kconfig_file="$1" - local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}" - # Github does not return the "last-modified" header when retrieving the - # raw contents of the file. Use the API call to get the last-modified - # time of the kernel config and only update the config if it has been - # updated after the previously cached config was created. This avoids - # unnecessarily compiling the kernel and selftests. - if [[ -f "${kconfig_file}" ]]; then - local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \ - grep "last-modified" | awk -F ': ' '{print $2}')" - local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")" - local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")" + local kernel_checkout="$1" + local kconfig_file="$2" - if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then - ${update_command} - fi + if [[ -f "${kconfig_file}" ]]; then + local local_modified="$(stat -c %Y "${kconfig_file}")" + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + local src_modified="$(stat -c %Y "${kconfig_src}")" + # Only update the config if it has been updated after the + # previously cached config was created. This avoids + # unnecessarily compiling the kernel and selftests. + if [[ "${src_modified}" -gt "${local_modified}" ]]; then + do_update_kconfig "$kernel_checkout" "$kconfig_file" + # Once we have found one outdated configuration + # there is no need to check other ones. + break + fi + done else - ${update_command} + do_update_kconfig "$kernel_checkout" "$kconfig_file" fi } @@ -372,7 +387,7 @@ main() mkdir -p "${OUTPUT_DIR}" mkdir -p "${mount_dir}" - update_kconfig "${kconfig_file}" + update_kconfig "${kernel_checkout}" "${kconfig_file}" recompile_kernel "${kernel_checkout}" "${make_command}" diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile new file mode 100644 index 000000000000..2a731d5c6d85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = bridge_locked_port.sh \ + bridge_mdb.sh \ + bridge_mld.sh \ + bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ + bridge_vlan_unaware.sh \ + local_termination.sh \ + no_forwarding.sh \ + test_bridge_fdb_stress.sh + +TEST_PROGS_EXTENDED := lib.sh + +TEST_FILES := forwarding.config + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh index 08a922d8b86a..224ca3695c89 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -84,6 +84,13 @@ lc_wait_until_port_count_is() busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc" } +lc_nested_devlink_dev_get() +{ + local lc=$1 + + devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink" +} + PROV_UNPROV_TIMEOUT=8000 # ms POST_PROV_ACT_TIMEOUT=2000 # ms PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms @@ -191,12 +198,30 @@ ports_check() check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)" } +lc_dev_info_provisioned_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_hw_revision + local running_ini_version + + fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.fixed."hw.revision"') + check_err $? "Failed to get linecard $lc fixed.hw.revision" + log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\"" + running_ini_version=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.running."ini.version"') + check_err $? "Failed to get linecard $lc running.ini.version" + log_info "Linecard $lc running.ini.version: \"$running_ini_version\"" +} + provision_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT supported_types_check $lc @@ -207,6 +232,11 @@ provision_test() fi provision_one $lc $LC_16X100G_TYPE ports_check $lc $LC_16X100G_PORT_COUNT + + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_provisioned_check $lc $nested_devlink_dev + log_test "Provision" } @@ -220,12 +250,32 @@ interface_check() setup_wait } +lc_dev_info_active_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_device_fw_psid + local running_device_fw + + fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.fixed" | \ + jq -e -r '."fw.psid"') + check_err $? "Failed to get linecard $lc fixed fw PSID" + log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\"" + + running_device_fw=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.running.fw") + check_err $? "Failed to get linecard $lc running.fw.version" + log_info "Linecard $lc running.fw: \"$running_device_fw\"" +} + activation_16x100G_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT type=$LC_16X100G_TYPE @@ -238,6 +288,10 @@ activation_16x100G_test() interface_check + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_active_check $lc $nested_devlink_dev + log_test "Activation 16x100G" } diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 71b306602368..616ed4019655 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -3,6 +3,6 @@ TEST_PROGS := gpio-mockup.sh gpio-sim.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name -CFLAGS += -O2 -g -Wall -I../../../../usr/include/ +CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES) include ../lib.mk diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 4158da0da2bb..2237d1aac801 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -82,8 +82,9 @@ static int next_cpu(int cpu) return cpu; } -static void *migration_worker(void *ign) +static void *migration_worker(void *__rseq_tid) { + pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid; cpu_set_t allowed_mask; int r, i, cpu; @@ -106,7 +107,7 @@ static void *migration_worker(void *ign) * stable, i.e. while changing affinity is in-progress. */ smp_wmb(); - r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); + r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", errno, strerror(errno)); smp_wmb(); @@ -231,7 +232,8 @@ int main(int argc, char *argv[]) vm = vm_create_default(VCPU_ID, 0, guest_code); ucall_init(vm, NULL); - pthread_create(&migration_thread, NULL, migration_worker, 0); + pthread_create(&migration_thread, NULL, migration_worker, + (void *)(unsigned long)gettid()); for (i = 0; !done; i++) { vcpu_run(vm, VCPU_ID); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 80628bf8413a..cd86d37146cc 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -35,6 +35,8 @@ TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh +TEST_PROGS += srv6_hencap_red_l3vpn_test.sh +TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh index 0727e2012b68..43469c7de118 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -525,7 +525,7 @@ arp_suppression() log_test "neigh_suppress: on / neigh exists: yes" - # Delete the neighbour from the the SVI. A single ARP request should be + # Delete the neighbour from the SVI. A single ARP request should be # received by the remote VTEP RET=0 diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh new file mode 100755 index 000000000000..28a775654b92 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -0,0 +1,879 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers advanced +# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each +# other. +# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN +# service, while hs-3 and hs-4 are connected using an IPv6 only VPN. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received +# by connected hosts, initiating the VPN tunnel. Such a behavior is an +# optimization of the SRv6 H.Encap aiming to reduce the length of the SID +# List carried in the pushed SRH. Specifically, the H.Encaps.Red removes +# the first SID contained in the SID List (i.e. SRv6 Policy) by storing it +# into the IPv6 Destination Address. When a SRv6 Policy is made of only one +# SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that +# SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List carried by +# the SRH; +# +# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +--- +---+ +# | | | | +# | hs-4 | | hs-3 | +# | | | | +# +--------+ +--------+ +# cafe::4 cafe::3 +# 10.0.0.4 10.0.0.3 +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d46 is associated with the SRv6 End.DT46 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# # SRv6 Policies +# =============== +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# IPv4/IPv6 VPN between hs-1 and hs-2 +# ----------------------------------- +# +# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46 +# ii.a) IPv4 traffic, SID List=fcff:2::d46 +# +# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers +# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through +# rt-2. +# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the +# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4 +# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID +# List (ii.a) consists of only one SID that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) IPv6 traffic, SID List=fcff:1::d46 +# ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46 +# +# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1. +# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers +# rt-4,rt-3,rt-1. +# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single +# SID (fcff::1::d46) inside the IPv6 DA. +# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first +# SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a) +# hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...| (ii.a) +# +# hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...| (i.b) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b) +# +# +# IPv6 VPN between hs-3 and hs-4 +# ------------------------------ +# +# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN. +# Specifically, packets generated from hs-3 and directed towards hs-4 are +# handled by rt-3 which applies the following SRv6 Policy: +# +# i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46 +# +# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4. +# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the +# first SID (fcff:2::e) in the IPv6 DA. +# +# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the +# following SRv6 Policy: +# +# i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46. +# +# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3. +# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the +# first SID (fcff:1::e) in the IPv6 DA. +# +# In summary: +# hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c) +# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly END_FUNC=000e +readonly DT46_FUNC=0d46 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that "dev" is dummy and the VRF is chosen + # for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${VRF_DEVNAME}" + + # Local End.DT46 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns "${nsname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${nsname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2 3 4"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + setup_hs 3 3 + setup_hs 4 4 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-3,rt-4 (SRv6 End behaviors) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red + setup_rt_policy_ipv6 1 2 "" 1 encap.red + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv4 2 1 "" 2 encap.red + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + + # create an IPv6 VPN between hosts hs-3 and hs-4 + # the network path between hs-3 and hs-4 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-3 -> hs-4 (H.Encaps.Red) + # - rt-2 (SRv6 End Behavior) + # - rt-4 (SRv6 End.DT46 behavior) + # + # Direction hs-4 -> hs-3 (H.Encaps.Red) + # - rt-1 (SRv6 End behavior) + # - rt-3 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 4 3 "2" 4 encap.red + setup_rt_policy_ipv6 3 4 "1" 3 encap.red + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +check_and_log_hs_ipv6_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_ipv4_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}" + check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)" + + check_and_log_hs_ipv6_connectivity 3 4 + check_and_log_hs_ipv6_connectivity 4 3 +} + +host_vpn_isolation_tests() +{ + local l1="1 2" + local l2="3 4" + local tmp + local i + local j + local k + + log_section "SRv6 VPN isolation test among hosts" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation "${i}" "${j}" + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + done + + log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)" + + check_and_log_hs_ipv4_isolation 2 4 + check_and_log_hs_ipv4_isolation 4 2 +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "encap.red"; then + echo "SKIP: Missing SRv6 encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh new file mode 100755 index 000000000000..cb4177d41b21 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -0,0 +1,821 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.L2Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers L2 VPN +# services to hosts, enabling them to communicate with each other. +# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service. +# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange +# full L2 frames as long as they carry IPv4/IPv6. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic +# received by connected hosts, initiating the VPN tunnel. Such a behavior +# is an optimization of the SRv6 H.L2Encap aiming to reduce the +# length of the SID List carried in the pushed SRH. Specifically, the +# H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6 +# Policy) by storing it into the IPv6 Destination Address. When a SRv6 +# Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits +# the SRH at all and pushes that SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List +# carried by the SRH; +# +# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy +# and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is +# sent over the interface connected with the destination host. +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d2 is associated with the SRv6 End.DX2 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# SRv6 Policies +# ============= +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# L2 VPN between hs-1 and hs-2 +# ---------------------------- +# +# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) L2 traffic, SID List=fcff:2::d2 +# +# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2. +# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List +# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2 +# +# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers +# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing +# the first SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...| (i.a) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly RT2HS_DEVNAME="veth-hs" +readonly HS_VETH_NAME="veth0" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly MAC_PREFIX=00:00:00:c0:01 +readonly END_FUNC=000e +readonly DX2_FUNC=00d2 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy + # interface) + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${DUMMY_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behaviors instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule add \ + to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + else + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_decap() +{ + local rt="$1" + local nsname + + nsname="$(get_rtname "${rt}")" + + # Local End.DX2 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \ + dev "${RT2HS_DEVNAME}" +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \ + dev "${HS_VETH_NAME}" nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \ + dev "${HS_VETH_NAME}" + + ip -netns "${hsname}" link set "${HS_VETH_NAME}" up + ip -netns "${hsname}" link set lo up + + ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \ + dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 +} + +# set an auto-generated mac address +# args: +# $1 - name of the node (e.g.: hs-1, rt-3, etc) +# $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc) +# $3 - host part of the IPv6 network address +# $4 - name of the network interface to which the generated mac address must +# be set. +set_mac_address() +{ + local nodename="$1" + local nodeid="$2" + local host="$3" + local ifname="$4" + local nsname + + nsname=$(get_nodename "${nodename}") + + ip -netns "${nsname}" link set dev "${ifname}" down + + ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \ + dev "${ifname}" + + # the IPv6 address must be set once again after the MAC address has + # been changed. + ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \ + dev "${ifname}" nodad + + ip -netns "${nsname}" link set dev "${ifname}" up +} + +set_host_l2peer() +{ + local hssrc="$1" + local hsdst="$2" + local ipprefix="$3" + local proto="$4" + local hssrc_name + local ipaddr + + hssrc_name="$(get_hsname "${hssrc}")" + + if [ "${proto}" -eq 6 ]; then + ipaddr="${ipprefix}::${hsdst}" + else + ipaddr="${ipprefix}.${hsdst}" + fi + + ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}" + + ip -netns "${hssrc_name}" neigh \ + add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \ + dev "${HS_VETH_NAME}" +} + +# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6 +# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6). +# args: +# $1 - source host +# $2 - SRv6 routers configured for steering tunneled traffic +# $3 - destination host +setup_l2vpn() +{ + local hssrc="$1" + local end_rts="$2" + local hsdst="$3" + local rtsrc="${hssrc}" + local rtdst="${hsdst}" + + # set fixed mac for source node and the neigh MAC address + set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 + + # we have to set the mac address of the veth-host (on ingress router) + # to the mac address of the remote peer (L2 VPN destination host). + # Otherwise, traffic coming from the source host is dropped at the + # ingress router. + set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + + # set the SRv6 Policies at the ingress router + setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 6 + setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 4 + + # set the decap behavior + setup_decap "${rtsrc}" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # create a L2 VPN between hs-1 and hs-2. + # NB: currently, H.L2Encap* enables tunneling of L2 frames whose + # layer-3 is IPv4/IPv6. + # + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.L2Encaps.Red) + # - rt-2 (SRv6 End.DX2 behavior) + # + # Direction hs-2 -> hs-1 (H.L2Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DX2 behavior) + setup_l2vpn 1 "" 2 + setup_l2vpn 2 "4 3" 1 + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "l2encap.red"; then + echo "SKIP: Missing SRv6 l2encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4ecbac197c46..2cbb12736596 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -644,12 +644,14 @@ TEST_F(tls, splice_from_pipe2) int p2[2]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); ASSERT_GE(pipe(p2), 0); - EXPECT_GE(write(p[1], mem_send, 8000), 0); - EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); - EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); + EXPECT_EQ(write(p[1], mem_send, 8000), 8000); + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000); + EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000); + EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000); EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -683,10 +685,12 @@ TEST_F(tls, splice_to_pipe) char mem_recv[TLS_PAYLOAD_MAX_LEN]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); - EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0); - EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0); - EXPECT_GE(read(p[0], mem_recv, send_len), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -875,6 +879,8 @@ TEST_F(tls, multiple_send_single_recv) char recv_mem[2 * 10]; char send_mem[10]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); @@ -891,6 +897,8 @@ TEST_F(tls, single_send_multiple_recv_non_align) char recv_mem[recv_len * 2]; char send_mem[total_len]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -936,10 +944,10 @@ TEST_F(tls, recv_peek) char buf[15]; EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); memset(buf, 0, sizeof(buf)); - EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); } |