diff options
1001 files changed, 24591 insertions, 10199 deletions
@@ -229,6 +229,7 @@ Matthew Wilcox <willy@infradead.org> <mawilcox@microsoft.com> Matthew Wilcox <willy@infradead.org> <willy@debian.org> Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com> Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org> +Matthias Fuchs <socketcan@esd.eu> <matthias.fuchs@esd.eu> Matthieu CASTET <castet.matthieu@free.fr> Matt Ranostay <matt.ranostay@konsulko.com> <matt@ranostay.consulting> Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com> @@ -341,6 +342,7 @@ Sumit Semwal <sumit.semwal@ti.com> Takashi YOSHII <takashi.yoshii.zj@renesas.com> Tejun Heo <htejun@gmail.com> Thomas Graf <tgraf@suug.ch> +Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu> Thomas Pedersen <twp@codeaurora.org> Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com> Todor Tomov <todor.too@gmail.com> <todor.tomov@linaro.org> diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst index 3de1d51e41da..6bf9c5ac7576 100644 --- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst +++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst @@ -108,7 +108,7 @@ This bump in ABI version is at most once per kernel development cycle. For example, if current state of ``libbpf.map`` is: -.. code-block:: c +.. code-block:: none LIBBPF_0.0.1 { global: @@ -121,7 +121,7 @@ For example, if current state of ``libbpf.map`` is: , and a new symbol ``bpf_func_c`` is being introduced, then ``libbpf.map`` should be changed like this: -.. code-block:: c +.. code-block:: none LIBBPF_0.0.1 { global: diff --git a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml index b2a1e42c56fa..71de5631ebae 100644 --- a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml +++ b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml @@ -152,47 +152,6 @@ allOf: maxItems: 1 st,drdy-int-pin: false - - if: - properties: - compatible: - enum: - # Two intertial interrupts i.e. accelerometer/gyro interrupts - - st,h3lis331dl-accel - - st,l3g4200d-gyro - - st,l3g4is-gyro - - st,l3gd20-gyro - - st,l3gd20h-gyro - - st,lis2de12 - - st,lis2dw12 - - st,lis2hh12 - - st,lis2dh12-accel - - st,lis331dl-accel - - st,lis331dlh-accel - - st,lis3de - - st,lis3dh-accel - - st,lis3dhh - - st,lis3mdl-magn - - st,lng2dm-accel - - st,lps331ap-press - - st,lsm303agr-accel - - st,lsm303dlh-accel - - st,lsm303dlhc-accel - - st,lsm303dlm-accel - - st,lsm330-accel - - st,lsm330-gyro - - st,lsm330d-accel - - st,lsm330d-gyro - - st,lsm330dl-accel - - st,lsm330dl-gyro - - st,lsm330dlc-accel - - st,lsm330dlc-gyro - - st,lsm9ds0-gyro - - st,lsm9ds1-magn - then: - properties: - interrupts: - maxItems: 2 - required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml new file mode 100644 index 000000000000..2cd145a642f1 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/bosch,c_can.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Bosch C_CAN/D_CAN controller Device Tree Bindings + +description: Bosch C_CAN/D_CAN controller for CAN bus + +maintainers: + - Dario Binacchi <dariobin@libero.it> + +allOf: + - $ref: can-controller.yaml# + +properties: + compatible: + oneOf: + - enum: + - bosch,c_can + - bosch,d_can + - ti,dra7-d_can + - ti,am3352-d_can + - items: + - enum: + - ti,am4372-d_can + - const: ti,am3352-d_can + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 4 + + power-domains: + description: | + Should contain a phandle to a PM domain provider node and an args + specifier containing the DCAN device id value. It's mandatory for + Keystone 2 66AK2G SoCs only. + maxItems: 1 + + clocks: + description: | + CAN functional clock phandle. + maxItems: 1 + + clock-names: + maxItems: 1 + + syscon-raminit: + description: | + Handle to system control region that contains the RAMINIT register, + register offset to the RAMINIT register and the CAN instance number (0 + offset). + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + items: + - description: The phandle to the system control region. + - description: The register offset. + - description: The CAN instance number. + + resets: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +if: + properties: + compatible: + contains: + enum: + - bosch,d_can + +then: + properties: + interrupts: + minItems: 4 + maxItems: 4 + items: + - description: Error and status IRQ + - description: Message object IRQ + - description: RAM ECC correctable error IRQ + - description: RAM ECC non-correctable error IRQ + +else: + properties: + interrupts: + maxItems: 1 + items: + - description: Error and status IRQ + +additionalProperties: false + +examples: + - | + #include <dt-bindings/reset/altr,rst-mgr.h> + + can@ffc00000 { + compatible = "bosch,d_can"; + reg = <0xffc00000 0x1000>; + interrupts = <0 131 4>, <0 132 4>, <0 133 4>, <0 134 4>; + clocks = <&can0_clk>; + resets = <&rst CAN0_RESET>; + }; + - | + can@0 { + compatible = "ti,am3352-d_can"; + reg = <0x0 0x2000>; + clocks = <&dcan1_fck>; + clock-names = "fck"; + syscon-raminit = <&scm_conf 0x644 1>; + interrupts = <55>; + }; diff --git a/Documentation/devicetree/bindings/net/can/c_can.txt b/Documentation/devicetree/bindings/net/can/c_can.txt deleted file mode 100644 index 366479806acb..000000000000 --- a/Documentation/devicetree/bindings/net/can/c_can.txt +++ /dev/null @@ -1,65 +0,0 @@ -Bosch C_CAN/D_CAN controller Device Tree Bindings -------------------------------------------------- - -Required properties: -- compatible : Should be "bosch,c_can" for C_CAN controllers and - "bosch,d_can" for D_CAN controllers. - Can be "ti,dra7-d_can", "ti,am3352-d_can" or - "ti,am4372-d_can". -- reg : physical base address and size of the C_CAN/D_CAN - registers map -- interrupts : property with a value describing the interrupt - number - -The following are mandatory properties for DRA7x, AM33xx and AM43xx SoCs only: -- ti,hwmods : Must be "d_can<n>" or "c_can<n>", n being the - instance number - -The following are mandatory properties for Keystone 2 66AK2G SoCs only: -- power-domains : Should contain a phandle to a PM domain provider node - and an args specifier containing the DCAN device id - value. This property is as per the binding, - Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml -- clocks : CAN functional clock phandle. This property is as per the - binding, - Documentation/devicetree/bindings/clock/ti,sci-clk.yaml - -Optional properties: -- syscon-raminit : Handle to system control region that contains the - RAMINIT register, register offset to the RAMINIT - register and the CAN instance number (0 offset). - -Note: "ti,hwmods" field is used to fetch the base address and irq -resources from TI, omap hwmod data base during device registration. -Future plan is to migrate hwmod data base contents into device tree -blob so that, all the required data will be used from device tree dts -file. - -Example: - -Step1: SoC common .dtsi file - - dcan1: d_can@481d0000 { - compatible = "bosch,d_can"; - reg = <0x481d0000 0x2000>; - interrupts = <55>; - interrupt-parent = <&intc>; - status = "disabled"; - }; - -(or) - - dcan1: d_can@481d0000 { - compatible = "bosch,d_can"; - ti,hwmods = "d_can1"; - reg = <0x481d0000 0x2000>; - interrupts = <55>; - interrupt-parent = <&intc>; - status = "disabled"; - }; - -Step 2: board specific .dts file - - &dcan1 { - status = "okay"; - }; diff --git a/Documentation/devicetree/bindings/net/can/can-controller.yaml b/Documentation/devicetree/bindings/net/can/can-controller.yaml index 9cf2ae097156..1f0e98051074 100644 --- a/Documentation/devicetree/bindings/net/can/can-controller.yaml +++ b/Documentation/devicetree/bindings/net/can/can-controller.yaml @@ -13,6 +13,15 @@ properties: $nodename: pattern: "^can(@.*)?$" + termination-gpios: + description: GPIO pin to enable CAN bus termination. + maxItems: 1 + + termination-ohms: + description: The resistance value of the CAN bus termination resistor. + minimum: 1 + maximum: 65535 + additionalProperties: true ... diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml index 55bff1586b6f..3f0ee17c1461 100644 --- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml +++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml @@ -119,6 +119,9 @@ properties: minimum: 0 maximum: 2 + termination-gpios: true + termination-ohms: true + required: - compatible - reg @@ -148,3 +151,17 @@ examples: fsl,stop-mode = <&gpr 0x34 28>; fsl,scu-index = /bits/ 8 <1>; }; + - | + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/gpio/gpio.h> + + can@2090000 { + compatible = "fsl,imx6q-flexcan"; + reg = <0x02090000 0x4000>; + interrupts = <0 110 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks 1>, <&clks 2>; + clock-names = "ipg", "per"; + fsl,stop-mode = <&gpr 0x34 28>; + termination-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>; + termination-ohms = <120>; + }; diff --git a/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml index 0b33ba9ccb47..546c6e6d2fb0 100644 --- a/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml +++ b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml @@ -30,13 +30,15 @@ properties: - renesas,r8a77995-canfd # R-Car D3 - const: renesas,rcar-gen3-canfd # R-Car Gen3 and RZ/G2 + - items: + - enum: + - renesas,r9a07g044-canfd # RZ/G2{L,LC} + - const: renesas,rzg2l-canfd # RZ/G2L family + reg: maxItems: 1 - interrupts: - items: - - description: Channel interrupt - - description: Global interrupt + interrupts: true clocks: maxItems: 3 @@ -50,8 +52,7 @@ properties: power-domains: maxItems: 1 - resets: - maxItems: 1 + resets: true renesas,no-can-fd: $ref: /schemas/types.yaml#/definitions/flag @@ -91,6 +92,62 @@ required: - channel0 - channel1 +if: + properties: + compatible: + contains: + enum: + - renesas,rzg2l-canfd +then: + properties: + interrupts: + items: + - description: CAN global error interrupt + - description: CAN receive FIFO interrupt + - description: CAN0 error interrupt + - description: CAN0 transmit interrupt + - description: CAN0 transmit/receive FIFO receive completion interrupt + - description: CAN1 error interrupt + - description: CAN1 transmit interrupt + - description: CAN1 transmit/receive FIFO receive completion interrupt + + interrupt-names: + items: + - const: g_err + - const: g_recc + - const: ch0_err + - const: ch0_rec + - const: ch0_trx + - const: ch1_err + - const: ch1_rec + - const: ch1_trx + + resets: + maxItems: 2 + + reset-names: + items: + - const: rstp_n + - const: rstc_n + + required: + - interrupt-names + - reset-names +else: + properties: + interrupts: + items: + - description: Channel interrupt + - description: Global interrupt + + interrupt-names: + items: + - const: ch_int + - const: g_int + + resets: + maxItems: 1 + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index a4d547efc32a..af9df2f01a1c 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -8,6 +8,7 @@ Required properties: Use "cdns,np4-macb" for NP4 SoC devices. Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb". Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs. + Use "atmel,sama5d29-gem" for GEM XL IP (10/100) available on Atmel sama5d29 SoCs. Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs. Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs. Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs. diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index ed88ba4b94df..b8a0b392b24e 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -87,16 +87,24 @@ properties: - const: ipa-setup-ready interconnects: - items: - - description: Interconnect path between IPA and main memory - - description: Interconnect path between IPA and internal memory - - description: Interconnect path between IPA and the AP subsystem + oneOf: + - items: + - description: Path leading to system memory + - description: Path between the AP and IPA config space + - items: + - description: Path leading to system memory + - description: Path leading to internal memory + - description: Path between the AP and IPA config space interconnect-names: - items: - - const: memory - - const: imem - - const: config + oneOf: + - items: + - const: memory + - const: config + - items: + - const: memory + - const: imem + - const: config qcom,smem-states: $ref: /schemas/types.yaml#/definitions/phandle-array diff --git a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml index 0c973310ada0..2af304341772 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml @@ -14,7 +14,9 @@ allOf: properties: compatible: - const: qcom,ipq4019-mdio + enum: + - qcom,ipq4019-mdio + - qcom,ipq5018-mdio "#address-cells": const: 1 @@ -23,7 +25,18 @@ properties: const: 0 reg: + minItems: 1 + maxItems: 2 + description: + the first Address and length of the register set for the MDIO controller. + the second Address and length of the register for ethernet LDO, this second + address range is only required by the platform IPQ50xx. + + clocks: maxItems: 1 + description: | + MDIO clock source frequency fixed to 100MHZ, this clock should be specified + by the platform IPQ807x, IPQ60xx and IPQ50xx. required: - compatible diff --git a/Documentation/gpu/rfc/i915_gem_lmem.rst b/Documentation/gpu/rfc/i915_gem_lmem.rst index 675ba8620d66..b421a3c1806e 100644 --- a/Documentation/gpu/rfc/i915_gem_lmem.rst +++ b/Documentation/gpu/rfc/i915_gem_lmem.rst @@ -18,114 +18,5 @@ real, with all the uAPI bits is: * Route shmem backend over to TTM SYSTEM for discrete * TTM purgeable object support * Move i915 buddy allocator over to TTM - * MMAP ioctl mode(see `I915 MMAP`_) - * SET/GET ioctl caching(see `I915 SET/GET CACHING`_) * Send RFC(with mesa-dev on cc) for final sign off on the uAPI * Add pciid for DG1 and turn on uAPI for real - -New object placement and region query uAPI -========================================== -Starting from DG1 we need to give userspace the ability to allocate buffers from -device local-memory. Currently the driver supports gem_create, which can place -buffers in system memory via shmem, and the usual assortment of other -interfaces, like dumb buffers and userptr. - -To support this new capability, while also providing a uAPI which will work -beyond just DG1, we propose to offer three new bits of uAPI: - -DRM_I915_QUERY_MEMORY_REGIONS ------------------------------ -New query ID which allows userspace to discover the list of supported memory -regions(like system-memory and local-memory) for a given device. We identify -each region with a class and instance pair, which should be unique. The class -here would be DEVICE or SYSTEM, and the instance would be zero, on platforms -like DG1. - -Side note: The class/instance design is borrowed from our existing engine uAPI, -where we describe every physical engine in terms of its class, and the -particular instance, since we can have more than one per class. - -In the future we also want to expose more information which can further -describe the capabilities of a region. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_memory_class drm_i915_gem_memory_class_instance drm_i915_memory_region_info drm_i915_query_memory_regions - -GEM_CREATE_EXT --------------- -New ioctl which is basically just gem_create but now allows userspace to provide -a chain of possible extensions. Note that if we don't provide any extensions and -set flags=0 then we get the exact same behaviour as gem_create. - -Side note: We also need to support PXP[1] in the near future, which is also -applicable to integrated platforms, and adds its own gem_create_ext extension, -which basically lets userspace mark a buffer as "protected". - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext - -I915_GEM_CREATE_EXT_MEMORY_REGIONS ----------------------------------- -Implemented as an extension for gem_create_ext, we would now allow userspace to -optionally provide an immutable list of preferred placements at creation time, -in priority order, for a given buffer object. For the placements we expect -them each to use the class/instance encoding, as per the output of the regions -query. Having the list in priority order will be useful in the future when -placing an object, say during eviction. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext_memory_regions - -One fair criticism here is that this seems a little over-engineered[2]. If we -just consider DG1 then yes, a simple gem_create.flags or something is totally -all that's needed to tell the kernel to allocate the buffer in local-memory or -whatever. However looking to the future we need uAPI which can also support -upcoming Xe HP multi-tile architecture in a sane way, where there can be -multiple local-memory instances for a given device, and so using both class and -instance in our uAPI to describe regions is desirable, although specifically -for DG1 it's uninteresting, since we only have a single local-memory instance. - -Existing uAPI issues -==================== -Some potential issues we still need to resolve. - -I915 MMAP ---------- -In i915 there are multiple ways to MMAP GEM object, including mapping the same -object using different mapping types(WC vs WB), i.e multiple active mmaps per -object. TTM expects one MMAP at most for the lifetime of the object. If it -turns out that we have to backpedal here, there might be some potential -userspace fallout. - -I915 SET/GET CACHING --------------------- -In i915 we have set/get_caching ioctl. TTM doesn't let us to change this, but -DG1 doesn't support non-snooped pcie transactions, so we can just always -allocate as WB for smem-only buffers. If/when our hw gains support for -non-snooped pcie transactions then we must fix this mode at allocation time as -a new GEM extension. - -This is related to the mmap problem, because in general (meaning, when we're -not running on intel cpus) the cpu mmap must not, ever, be inconsistent with -allocation mode. - -Possible idea is to let the kernel picks the mmap mode for userspace from the -following table: - -smem-only: WB. Userspace does not need to call clflush. - -smem+lmem: We only ever allow a single mode, so simply allocate this as uncached -memory, and always give userspace a WC mapping. GPU still does snooped access -here(assuming we can't turn it off like on DG1), which is a bit inefficient. - -lmem only: always WC - -This means on discrete you only get a single mmap mode, all others must be -rejected. That's probably going to be a new default mode or something like -that. - -Links -===== -[1] https://patchwork.freedesktop.org/series/86798/ - -[2] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5599#note_553791 diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index 8b76217e370a..6270f1fd7d4e 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -17,6 +17,7 @@ Introduction busses/index i2c-topology muxes/i2c-mux-gpio + i2c-sysfs Writing device drivers ====================== diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index ef8cb62e82a1..4b59cf2c599f 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -656,3 +656,47 @@ Bridge offloads tracepoints: $ cat /sys/kernel/debug/tracing/trace ... ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1 + +Eswitch QoS tracepoints: + +- mlx5_esw_vport_qos_create: trace creation of transmit scheduler arbiter for vport:: + + $ echo mlx5:mlx5_esw_vport_qos_create >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-23496 [018] .... 73136.838831: mlx5_esw_vport_qos_create: (0000:82:00.0) vport=2 tsar_ix=4 bw_share=0, max_rate=0 group=000000007b576bb3 + +- mlx5_esw_vport_qos_config: trace configuration of transmit scheduler arbiter for vport:: + + $ echo mlx5:mlx5_esw_vport_qos_config >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-26548 [023] .... 75754.223823: mlx5_esw_vport_qos_config: (0000:82:00.0) vport=1 tsar_ix=3 bw_share=34, max_rate=10000 group=000000007b576bb3 + +- mlx5_esw_vport_qos_destroy: trace deletion of transmit scheduler arbiter for vport:: + + $ echo mlx5:mlx5_esw_vport_qos_destroy >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-27418 [004] .... 76546.680901: mlx5_esw_vport_qos_destroy: (0000:82:00.0) vport=1 tsar_ix=3 + +- mlx5_esw_group_qos_create: trace creation of transmit scheduler arbiter for rate group:: + + $ echo mlx5:mlx5_esw_group_qos_create >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-26578 [008] .... 75776.022112: mlx5_esw_group_qos_create: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 + +- mlx5_esw_group_qos_config: trace configuration of transmit scheduler arbiter for rate group:: + + $ echo mlx5:mlx5_esw_group_qos_config >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-27303 [020] .... 76461.455356: mlx5_esw_group_qos_config: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 bw_share=100 max_rate=20000 + +- mlx5_esw_group_qos_destroy: trace deletion of transmit scheduler arbiter for group:: + + $ echo mlx5:mlx5_esw_group_qos_destroy >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-27418 [006] .... 76547.187258: mlx5_esw_group_qos_destroy: (0000:82:00.0) group=000000007b576bb3 tsar_ix=1 diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index 54c9f107c4b0..4878907e9232 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -97,6 +97,18 @@ own name. * - ``enable_roce`` - Boolean - Enable handling of RoCE traffic in the device. + * - ``enable_eth`` + - Boolean + - When enabled, the device driver will instantiate Ethernet specific + auxiliary device of the devlink device. + * - ``enable_rdma`` + - Boolean + - When enabled, the device driver will instantiate RDMA specific + auxiliary device of the devlink device. + * - ``enable_vnet`` + - Boolean + - When enabled, the device driver will instantiate VDPA networking + specific auxiliary device of the devlink device. * - ``internal_err_reset`` - Boolean - When enabled, the device driver will reset the device on internal diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index c86628e6a235..c690bb37430d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -595,6 +595,14 @@ Link extended substates: that is not formally supported, which led to signal integrity issues + + ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST`` The external clock signal for + SerDes is too weak or + unavailable. + + ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS`` The received signal for + SerDes is too weak because + analog loss of signal. ================================================================= ============================= Cable issue substates: diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 76d939e688b8..b0d4da71e68e 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -45,3 +45,15 @@ allow_join_initial_addr_port - BOOLEAN This is a per-namespace sysctl. Default: 1 + +stale_loss_cnt - INTEGER + The number of MPTCP-level retransmission intervals with no traffic and + pending outstanding data on a given subflow required to declare it stale. + The packet scheduler ignores stale subflows. + A low stale_loss_cnt value allows for fast active-backup switch-over, + an high value maximize links utilization on edge scenarios e.g. lossy + link with high BER or peer pausing the data processing. + + This is a per-namespace sysctl. + + Default: 4 diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 91b2cf712801..e26532f49760 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance gets overloaded very easily and netdev@vger really doesn't need more traffic if we can help it. +netdevsim is great, can I extend it for my out-of-tree tests? +------------------------------------------------------------- + +No, `netdevsim` is a test vehicle solely for upstream tests. +(Please add your tests under tools/testing/selftests/.) + +We also give no guarantees that `netdevsim` won't change in the future +in a way which would break what would normally be considered uAPI. + +Is netdevsim considered a "user" of an API? +------------------------------------------- + +Linux kernel has a long standing rule that no API should be added unless +it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are +strongly encouraged when adding new APIs, but `netdevsim` in itself +is **not** considered a use case/user. + Any other tips to help ensure my net/net-next patch gets OK'd? -------------------------------------------------------------- Attention to detail. Re-read your own work as if you were the diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index d31ed6c1cb0d..024d784157c8 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -191,19 +191,9 @@ nf_flowtable_tcp_timeout - INTEGER (seconds) TCP connections may be offloaded from nf conntrack to nf flow table. Once aged, the connection is returned to nf conntrack with tcp pickup timeout. -nf_flowtable_tcp_pickup - INTEGER (seconds) - default 120 - - TCP connection timeout after being aged from nf flow table offload. - nf_flowtable_udp_timeout - INTEGER (seconds) default 30 Control offload timeout for udp connections. UDP connections may be offloaded from nf conntrack to nf flow table. Once aged, the connection is returned to nf conntrack with udp pickup timeout. - -nf_flowtable_udp_pickup - INTEGER (seconds) - default 30 - - UDP connection timeout after being aged from nf flow table offload. diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst index 9c918f7cb0e8..1ee2141e8ef1 100644 --- a/Documentation/networking/operstates.rst +++ b/Documentation/networking/operstates.rst @@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3): state (f.e. VLAN). IF_OPER_TESTING (4): - Unused in current kernel. + Interface is in testing mode, for example executing driver self-tests + or media (cable) test. It can't be used for normal traffic until tests + complete. IF_OPER_DORMANT (5): Interface is L1 up, but waiting for an external event, f.e. for a @@ -111,7 +113,7 @@ it as lower layer. Note that for certain kind of soft-devices, which are not managing any real hardware, it is possible to set this bit from userspace. One -should use TVL IFLA_CARRIER to do so. +should use TLV IFLA_CARRIER to do so. netif_carrier_ok() can be used to query that bit. diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index d61219889e49..539e9d4a4860 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -263,7 +263,7 @@ Userspace can also add file descriptors to the notifying process via ``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of ``struct seccomp_notif_addfd`` should be the same ``id`` as in ``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags -like O_EXEC on the file descriptor in the notifying process. If the supervisor +like O_CLOEXEC on the file descriptor in the notifying process. If the supervisor wants to inject the file descriptor with a specific number, the ``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to the specific number to use. If that file descriptor is already open in the diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 35eca377543d..88fa495abbac 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -25,10 +25,10 @@ On x86: - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock -- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is - taken inside kvm->arch.mmu_lock, and cannot be taken without already - holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise - there's no need to take kvm->arch.tdp_mmu_pages_lock at all). +- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and + kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and + cannot be taken without already holding kvm->arch.mmu_lock (typically with + ``read_lock`` for the TDP MMU, thus the need for additional spinlocks). Everything else is a leaf: no other lock is taken inside the critical sections. diff --git a/MAINTAINERS b/MAINTAINERS index b8971a2f5a7e..1b06d4e0aac5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6905,6 +6905,12 @@ M: Mark Einon <mark.einon@gmail.com> S: Odd Fixes F: drivers/net/ethernet/agere/ +ETAS ES58X CAN/USB DRIVER +M: Vincent Mailhol <mailhol.vincent@wanadoo.fr> +L: linux-can@vger.kernel.org +S: Maintained +F: drivers/net/can/usb/etas_es58x/ + ETHERNET BRIDGE M: Roopa Prabhu <roopa@nvidia.com> M: Nikolay Aleksandrov <nikolay@nvidia.com> @@ -9750,11 +9756,6 @@ M: David Sterba <dsterba@suse.com> S: Odd Fixes F: drivers/tty/ipwireless/ -IPX NETWORK LAYER -L: netdev@vger.kernel.org -S: Obsolete -F: include/uapi/linux/ipx.h - IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) M: Marc Zyngier <maz@kernel.org> S: Maintained @@ -11347,7 +11348,7 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/phy/mxl-gpy.c -MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER +MCBA MICROCHIP CAN BUS ANALYZER TOOL DRIVER R: Yasushi SHOJI <yashi@spacecubics.com> L: linux-can@vger.kernel.org S: Maintained @@ -13890,6 +13891,12 @@ F: Documentation/devicetree/ F: arch/*/boot/dts/ F: include/dt-bindings/ +OPENCOMPUTE PTP CLOCK DRIVER +M: Jonathan Lemon <jonathan.lemon@gmail.com> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/ptp/ptp_ocp.c + OPENCORES I2C BUS DRIVER M: Peter Korsgaard <peter@korsgaard.com> M: Andrew Lunn <andrew@lunn.ch> @@ -15823,7 +15830,7 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c RENESAS ETHERNET DRIVERS -R: Sergei Shtylyov <sergei.shtylyov@gmail.com> +R: Sergey Shtylyov <s.shtylyov@omp.ru> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org F: Documentation/devicetree/bindings/net/renesas,*.yaml @@ -17835,7 +17842,7 @@ F: include/linux/sync_file.h F: include/uapi/linux/sync_file.h SYNOPSYS ARC ARCHITECTURE -M: Vineet Gupta <vgupta@synopsys.com> +M: Vineet Gupta <vgupta@kernel.org> L: linux-snps-arc@lists.infradead.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git @@ -20037,7 +20044,8 @@ F: Documentation/devicetree/bindings/extcon/wlf,arizona.yaml F: Documentation/devicetree/bindings/mfd/wlf,arizona.yaml F: Documentation/devicetree/bindings/mfd/wm831x.txt F: Documentation/devicetree/bindings/regulator/wlf,arizona.yaml -F: Documentation/devicetree/bindings/sound/wlf,arizona.yaml +F: Documentation/devicetree/bindings/sound/wlf,*.yaml +F: Documentation/devicetree/bindings/sound/wm* F: Documentation/hwmon/wm83??.rst F: arch/arm/mach-s3c/mach-crag6410* F: drivers/clk/clk-wm83*.c @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc6 NAME = Opossums on Parade # *DOCUMENTATION* @@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \ PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic - $(Q)rm -f .tmp_quiet_recordmcount PHONY += outputmakefile ifdef building_out_of_srctree @@ -1318,6 +1317,16 @@ scripts_unifdef: scripts_basic $(Q)$(MAKE) $(build)=scripts scripts/unifdef # --------------------------------------------------------------------------- +# Install + +# Many distributions have the custom install script, /sbin/installkernel. +# If DKMS is installed, 'make install' will eventually recuses back +# to the this Makefile to build and install external modules. +# Cancel sub_make_done so that options such as M=, V=, etc. are parsed. + +install: sub_make_done := + +# --------------------------------------------------------------------------- # Tools ifdef CONFIG_STACK_VALIDATION diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d8f51eb8963b..b5bf68e74732 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -409,7 +409,7 @@ choice help Depending on the configuration, CPU can contain DSP registers (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL). - Bellow is options describing how to handle these registers in + Below are options describing how to handle these registers in interrupt entry / exit and in context switch. config ARC_DSP_NONE diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h index 69debd77cd04..0b485800a392 100644 --- a/arch/arc/include/asm/checksum.h +++ b/arch/arc/include/asm/checksum.h @@ -24,7 +24,7 @@ */ static inline __sum16 csum_fold(__wsum s) { - unsigned r = s << 16 | s >> 16; /* ror */ + unsigned int r = s << 16 | s >> 16; /* ror */ s = ~s; s -= r; return s >> 16; diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 30b9ae511ea9..e1971d34ef30 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -123,7 +123,7 @@ static const char * const arc_pmu_ev_hw_map[] = { #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xffff -static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c index c67c0f0f5f77..ec640219d989 100644 --- a/arch/arc/kernel/fpu.c +++ b/arch/arc/kernel/fpu.c @@ -57,23 +57,26 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next) void fpu_init_task(struct pt_regs *regs) { + const unsigned int fwe = 0x80000000; + /* default rounding mode */ write_aux_reg(ARC_REG_FPU_CTRL, 0x100); - /* set "Write enable" to allow explicit write to exception flags */ - write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000); + /* Initialize to zero: setting requires FWE be set */ + write_aux_reg(ARC_REG_FPU_STATUS, fwe); } void fpu_save_restore(struct task_struct *prev, struct task_struct *next) { struct arc_fpu *save = &prev->thread.fpu; struct arc_fpu *restore = &next->thread.fpu; + const unsigned int fwe = 0x80000000; save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL); save->status = read_aux_reg(ARC_REG_FPU_STATUS); write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl); - write_aux_reg(ARC_REG_FPU_STATUS, restore->status); + write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status)); } #endif diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 47bab67f8649..9e28058cdba8 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -260,7 +260,7 @@ static void init_unwind_hdr(struct unwind_table *table, { const u8 *ptr; unsigned long tableSize = table->size, hdrSize; - unsigned n; + unsigned int n; const u32 *fde; struct { u8 version; @@ -462,7 +462,7 @@ static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; uleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -483,7 +483,7 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; sleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -609,7 +609,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, static signed fde_pointer_type(const u32 *cie) { const u8 *ptr = (const u8 *)(cie + 2); - unsigned version = *ptr; + unsigned int version = *ptr; if (*++ptr) { const char *aug; @@ -904,7 +904,7 @@ int arc_unwind(struct unwind_frame_info *frame) const u8 *ptr = NULL, *end = NULL; unsigned long pc = UNW_PC(frame) - frame->call_frame; unsigned long startLoc = 0, endLoc = 0, cfa; - unsigned i; + unsigned int i; signed ptrType = -1; uleb128_t retAddrReg = 0; const struct unwind_table *table; diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index e2146a8da195..529ae50f9fe2 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 40ef3973f2a9..ba58e6b0da1d 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1595,7 +1595,7 @@ compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; clocks = <&dcan1_fck>; - clock-name = "fck"; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 1>; interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index aae0af10a5b1..2aa75abf85a9 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -582,7 +582,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps65218: tps65218@24 { reg = <0x24>; diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index f98691ae4415..d3082b9774e4 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -388,13 +388,13 @@ pinctrl_power_button: powerbutgrp { fsl,pins = < - MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 + MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 >; }; pinctrl_power_out: poweroutgrp { fsl,pins = < - MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 + MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index 0ad8ccde0cf8..f86efd0ccc40 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -54,7 +54,13 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; phy-mode = "rgmii-id"; - phy-reset-duration = <2>; + + /* + * The PHY seems to require a long-enough reset duration to avoid + * some rare issues where the PHY gets stuck in an inconsistent and + * non-functional state at boot-up. 10ms proved to be fine . + */ + phy-reset-duration = <10>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi index a0545431b3dc..9f1e38282bee 100644 --- a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi @@ -43,6 +43,7 @@ assigned-clock-rates = <0>, <198000000>; cap-power-off-card; keep-power-in-suspend; + max-frequency = <25000000>; mmc-pwrseq = <&wifi_pwrseq>; no-1-8-v; non-removable; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 45435bb88c89..373984c130e0 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -30,14 +30,6 @@ regulator-max-microvolt = <5000000>; }; - vdds_1v8_main: fixedregulator-vdds_1v8_main { - compatible = "regulator-fixed"; - regulator-name = "vdds_1v8_main"; - vin-supply = <&smps7_reg>; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - }; - vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -487,6 +479,7 @@ regulator-boot-on; }; + vdds_1v8_main: smps7_reg: smps7 { /* VDDS_1v8_OMAP over VDDS_1v8_MAIN */ regulator-name = "smps7"; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index c9b906432341..1815361fe73c 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -755,14 +755,14 @@ status = "disabled"; }; - vica: intc@10140000 { + vica: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x20>; }; - vicb: intc@10140020 { + vicb: interrupt-controller@10140020 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index c5ea08fec535..6cf1c8b4c6e2 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -37,7 +37,7 @@ poll-interval = <20>; /* - * The EXTi IRQ line 3 is shared with touchscreen and ethernet, + * The EXTi IRQ line 3 is shared with ethernet, * so mark this as polled GPIO key. */ button-0 { @@ -47,6 +47,16 @@ }; /* + * The EXTi IRQ line 6 is shared with touchscreen, + * so mark this as polled GPIO key. + */ + button-1 { + label = "TA2-GPIO-B"; + linux,code = <KEY_B>; + gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; + }; + + /* * The EXTi IRQ line 0 is shared with PMIC, * so mark this as polled GPIO key. */ @@ -60,13 +70,6 @@ gpio-keys { compatible = "gpio-keys"; - button-1 { - label = "TA2-GPIO-B"; - linux,code = <KEY_B>; - gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; - wakeup-source; - }; - button-3 { label = "TA4-GPIO-D"; linux,code = <KEY_D>; @@ -82,6 +85,7 @@ label = "green:led5"; gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>; default-state = "off"; + status = "disabled"; }; led-1 { @@ -185,8 +189,8 @@ touchscreen@38 { compatible = "edt,edt-ft5406"; reg = <0x38>; - interrupt-parent = <&gpiog>; - interrupts = <2 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ + interrupt-parent = <&gpioc>; + interrupts = <6 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ }; }; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index 2af0a6752674..8c41f819f776 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -12,6 +12,8 @@ aliases { ethernet0 = ðernet0; ethernet1 = &ksz8851; + rtc0 = &hwrtc; + rtc1 = &rtc; }; memory@c0000000 { @@ -138,6 +140,7 @@ reset-gpios = <&gpioh 3 GPIO_ACTIVE_LOW>; reset-assert-us = <500>; reset-deassert-us = <500>; + smsc,disable-energy-detect; interrupt-parent = <&gpioi>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; }; @@ -248,7 +251,7 @@ /delete-property/dmas; /delete-property/dma-names; - rtc@32 { + hwrtc: rtc@32 { compatible = "microcrystal,rv8803"; reg = <0x32>; }; diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index f0a073a71401..13f3068e9845 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -68,7 +68,6 @@ void imx_set_cpu_arg(int cpu, u32 arg); void v7_secondary_startup(void); void imx_scu_map_io(void); void imx_smp_prepare(void); -void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); #else static inline void imx_scu_map_io(void) {} static inline void imx_smp_prepare(void) {} @@ -81,6 +80,7 @@ void imx_gpc_mask_all(void); void imx_gpc_restore_all(void); void imx_gpc_hwirq_mask(unsigned int hwirq); void imx_gpc_hwirq_unmask(unsigned int hwirq); +void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); void imx_anatop_init(void); void imx_anatop_pre_suspend(void); void imx_anatop_post_resume(void); diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 0dfd0ae7a63d..af12668d0bf5 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -103,6 +103,7 @@ struct mmdc_pmu { struct perf_event *mmdc_events[MMDC_NUM_COUNTERS]; struct hlist_node node; struct fsl_mmdc_devtype_data *devtype_data; + struct clk *mmdc_ipg_clk; }; /* @@ -462,11 +463,14 @@ static int imx_mmdc_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); + iounmap(pmu_mmdc->mmdc_base); + clk_disable_unprepare(pmu_mmdc->mmdc_ipg_clk); kfree(pmu_mmdc); return 0; } -static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base) +static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base, + struct clk *mmdc_ipg_clk) { struct mmdc_pmu *pmu_mmdc; char *name; @@ -494,6 +498,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b } mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev); + pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; if (mmdc_num == 0) name = "mmdc"; else @@ -529,7 +534,7 @@ pmu_free: #else #define imx_mmdc_remove NULL -#define imx_mmdc_perf_init(pdev, mmdc_base) 0 +#define imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk) 0 #endif static int imx_mmdc_probe(struct platform_device *pdev) @@ -567,7 +572,13 @@ static int imx_mmdc_probe(struct platform_device *pdev) val &= ~(1 << BP_MMDC_MAPSR_PSD); writel_relaxed(val, reg); - return imx_mmdc_perf_init(pdev, mmdc_base); + err = imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk); + if (err) { + iounmap(mmdc_base); + clk_disable_unprepare(mmdc_ipg_clk); + } + + return err; } int imx_mmdc_get_ddr_type(void) diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index bf14d65120b9..34a1c7742088 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -91,6 +91,7 @@ config MACH_IXDP465 config MACH_GORAMO_MLR bool "GORAMO Multi Link Router" + depends on IXP4XX_PCI_LEGACY help Say 'Y' here if you want your kernel to support GORAMO MultiLink router. diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 65934b2924fb..12b26e04686f 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3776,6 +3776,7 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) struct omap_hwmod_ocp_if *oi; struct clockdomain *clkdm; struct clk_hw_omap *clk; + struct clk_hw *hw; if (!oh) return NULL; @@ -3792,7 +3793,14 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) c = oi->_clk; } - clk = to_clk_hw_omap(__clk_get_hw(c)); + hw = __clk_get_hw(c); + if (!hw) + return NULL; + + clk = to_clk_hw_omap(hw); + if (!clk) + return NULL; + clkdm = clk->clkdm; if (!clkdm) return NULL; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b5b13a932561..fdcd54d39c1e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1800,11 +1800,11 @@ config RANDOMIZE_BASE If unsure, say N. config RANDOMIZE_MODULE_REGION_FULL - bool "Randomize the module region over a 4 GB range" + bool "Randomize the module region over a 2 GB range" depends on RANDOMIZE_BASE default y help - Randomizes the location of the module region inside a 4 GB window + Randomizes the location of the module region inside a 2 GB window covering the core kernel. This way, it is less likely for modules to leak information about the location of core kernel data structures but it does imply that function calls between modules and the core @@ -1812,7 +1812,10 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the - core kernel, so branch relocations are always in range. + core kernel, so branch relocations are almost always in range unless + ARM64_MODULE_PLTS is enabled and the region is exhausted. In this + particular case of region exhaustion, modules might be able to fall + back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7bc37d0a1b68..7b668db43261 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -21,19 +21,11 @@ LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) - ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) -$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) - else + ifeq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) - ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) -$(warning LSE atomics not supported by binutils) - endif -endif - cc_has_k_constraint := $(call try-run,echo \ 'int main(void) { \ asm volatile("and w0, w0, %w0" :: "K" (4294967295)); \ @@ -176,6 +168,17 @@ vdso_install: archprepare: $(Q)$(MAKE) $(build)=arch/arm64/tools kapi +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) + @echo "warning: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum" >&2 + endif +endif +ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y) + ifneq ($(CONFIG_ARM64_LSE_ATOMICS),y) + @echo "warning: LSE atomics not supported by binutils" >&2 + endif +endif + # We use MRPROPER_FILES and CLEAN_FILES now archclean: diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts index dd764b720fb0..f6a79c8080d1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts @@ -54,6 +54,7 @@ &mscc_felix_port0 { label = "swp0"; + managed = "in-band-status"; phy-handle = <&phy0>; phy-mode = "sgmii"; status = "okay"; @@ -61,6 +62,7 @@ &mscc_felix_port1 { label = "swp1"; + managed = "in-band-status"; phy-handle = <&phy1>; phy-mode = "sgmii"; status = "okay"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b2e3e5d2a108..343ecf0e8973 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -66,7 +66,7 @@ }; }; - sysclk: clock-sysclk { + sysclk: sysclk { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index ce2bcddf396f..a05b1ab2dd12 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -19,6 +19,8 @@ aliases { spi0 = &spi0; ethernet1 = ð1; + mmc0 = &sdhci0; + mmc1 = &sdhci1; }; chosen { @@ -119,6 +121,7 @@ pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; clock-frequency = <100000>; + /delete-property/ mrvl,i2c-fast-mode; status = "okay"; rtc@6f { diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 076d5efc4c3d..5ba7a4519b95 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1840,7 +1840,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE1R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE1W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE1>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE1 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14120000 { @@ -1890,7 +1894,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE2AR &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE2AW &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE2>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE2 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14140000 { @@ -1940,7 +1948,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE3R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE3W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE3>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE3 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14160000 { @@ -1990,7 +2002,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14180000 { @@ -2040,7 +2056,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@141a0000 { @@ -2094,7 +2114,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14160000 { @@ -2127,6 +2151,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14180000 { @@ -2159,6 +2191,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@141a0000 { @@ -2194,6 +2234,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; sram@40000000 { diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index e58bca832dff..41b332c054ab 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -320,7 +320,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->regs[0]; + unsigned long val = regs->regs[0]; + + /* + * Audit currently uses regs_return_value() instead of + * syscall_get_return_value(). Apply the same sign-extension here until + * audit is updated to use syscall_get_return_value(). + */ + if (compat_user_mode(regs)) + val = sign_extend64(val, 31); + + return val; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 1801399204d7..8aebc00c1718 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -35,7 +35,7 @@ struct stack_info { * accounting information necessary for robust unwinding. * * @fp: The fp value in the frame record (or the real fp) - * @pc: The fp value in the frame record (or the real lr) + * @pc: The lr value in the frame record (or the real lr) * * @stacks_done: Stacks which have been entirely unwound, for which it is no * longer valid to unwind to. diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index cfc0672013f6..03e20895453a 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -29,22 +29,23 @@ static inline void syscall_rollback(struct task_struct *task, regs->regs[0] = regs->orig_x0; } - -static inline long syscall_get_error(struct task_struct *task, - struct pt_regs *regs) +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) { - unsigned long error = regs->regs[0]; + unsigned long val = regs->regs[0]; if (is_compat_thread(task_thread_info(task))) - error = sign_extend64(error, 31); + val = sign_extend64(val, 31); - return IS_ERR_VALUE(error) ? error : 0; + return val; } -static inline long syscall_get_return_value(struct task_struct *task, - struct pt_regs *regs) +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) { - return regs->regs[0]; + unsigned long error = syscall_get_return_value(task, regs); + + return IS_ERR_VALUE(error) ? error : 0; } static inline void syscall_set_return_value(struct task_struct *task, diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index cfa2cfde3019..418b2bba1521 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -162,7 +162,9 @@ u64 __init kaslr_early_init(void) * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, * _stext) . This guarantees that the resulting region still * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers. + * be resolved without veneers unless this region is exhausted + * and we fall back to a larger 2GB window in module_alloc() + * when ARM64_MODULE_PLTS is enabled. */ module_range = MODULES_VSIZE - (u64)(_etext - _stext); module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 499b6b2f9757..b381a1ee9ea7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1862,7 +1862,7 @@ void syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, regs_return_value(regs)); + trace_sys_exit(regs, syscall_get_return_value(current, regs)); if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f8192f4ae0b8..23036334f4dc 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -29,6 +29,7 @@ #include <asm/unistd.h> #include <asm/fpsimd.h> #include <asm/ptrace.h> +#include <asm/syscall.h> #include <asm/signal32.h> #include <asm/traps.h> #include <asm/vdso.h> @@ -890,7 +891,7 @@ static void do_signal(struct pt_regs *regs) retval == -ERESTART_RESTARTBLOCK || (retval == -ERESTARTSYS && !(ksig.ka.sa.sa_flags & SA_RESTART)))) { - regs->regs[0] = -EINTR; + syscall_set_return_value(current, regs, -EINTR, 0); regs->pc = continue_addr; } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index b83c8d911930..8982a2b78acf 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -218,7 +218,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) #ifdef CONFIG_STACKTRACE -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, +noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 263d6c1a525f..50a0f1a38e84 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -54,10 +54,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } - if (is_compat_task()) - ret = lower_32_bits(ret); - - regs->regs[0] = ret; + syscall_set_return_value(current, regs, 0, ret); /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), @@ -115,7 +112,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * syscall. do_notify_resume() will send a signal to userspace * before the syscall is restarted. */ - regs->regs[0] = -ERESTARTNOINTR; + syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0); return; } @@ -136,7 +133,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * anyway. */ if (scno == NO_SYSCALL) - regs->regs[0] = -ENOSYS; + syscall_set_return_value(current, regs, -ENOSYS, 0); scno = syscall_trace_enter(regs); if (scno == NO_SYSCALL) goto trace_exit; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..0ca72f5cda41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.return_nisv_io_abort_to_user = true; break; case KVM_CAP_ARM_MTE: - if (!system_supports_mte() || kvm->created_vcpus) - return -EINVAL; - r = 0; - kvm->arch.mte_enabled = true; + mutex_lock(&kvm->lock); + if (!system_supports_mte() || kvm->created_vcpus) { + r = -EINVAL; + } else { + r = 0; + kvm->arch.mte_enabled = true; + } + mutex_unlock(&kvm->lock); break; default: r = -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..a6ce991b1467 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; - if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2)) + if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) return false; if (r1.start != r2.start) return false; diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 4e942b7ef022..653befc1b176 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -321,7 +321,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_MIPS CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \ + egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 4b2567d6b2df..c7925d0e9874 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -58,15 +58,20 @@ do { \ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = NULL; + pmd_t *pmd; struct page *pg; - pg = alloc_pages(GFP_KERNEL | __GFP_ACCOUNT, PMD_ORDER); - if (pg) { - pgtable_pmd_page_ctor(pg); - pmd = (pmd_t *)page_address(pg); - pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_ORDER); + if (!pg) + return NULL; + + if (!pgtable_pmd_page_ctor(pg)) { + __free_pages(pg, PMD_ORDER); + return NULL; } + + pmd = (pmd_t *)page_address(pg); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; } diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index ee7471984fe7..4ffbcc58c6f6 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -48,7 +48,8 @@ static struct plat_serial8250_port uart8250_data[] = { .mapbase = 0x1f000900, /* The CBUS UART */ .irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB2, .uartclk = 3686400, /* Twice the usual clk! */ - .iotype = UPIO_MEM32, + .iotype = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? + UPIO_MEM32BE : UPIO_MEM32, .flags = CBUS_UART_FLAGS, .regshift = 3, }, diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index d4bdf7d274ac..6b800d3e2681 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -583,6 +583,9 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); +/* irq.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(do_IRQ); + void __noreturn unrecoverable_exception(struct pt_regs *regs); void replay_system_reset(void); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 4982f3711fc3..2b3278534bc1 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -52,7 +52,7 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -extern void do_IRQ(struct pt_regs *regs); +void __do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 3e5d470a6155..14422e851494 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,22 @@ struct pt_regs unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; #endif +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + struct { /* Must be a multiple of 16 bytes */ + unsigned long mas0; + unsigned long mas1; + unsigned long mas2; + unsigned long mas3; + unsigned long mas6; + unsigned long mas7; + unsigned long srr0; + unsigned long srr1; + unsigned long csrr0; + unsigned long csrr1; + unsigned long dsrr0; + unsigned long dsrr1; + }; +#endif }; #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a47eefa09bcb..5bee245d832b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -309,24 +309,21 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#if defined(CONFIG_PPC32) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) - DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); - DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + STACK_PT_REGS_OFFSET(MAS0, mas0); /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */ - DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); - DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1)); - DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2)); - DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3)); - DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6)); - DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7)); - DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0)); - DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1)); - DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0)); - DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); - DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); - DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); -#endif + STACK_PT_REGS_OFFSET(MMUCR, mas0); + STACK_PT_REGS_OFFSET(MAS1, mas1); + STACK_PT_REGS_OFFSET(MAS2, mas2); + STACK_PT_REGS_OFFSET(MAS3, mas3); + STACK_PT_REGS_OFFSET(MAS6, mas6); + STACK_PT_REGS_OFFSET(MAS7, mas7); + STACK_PT_REGS_OFFSET(_SRR0, srr0); + STACK_PT_REGS_OFFSET(_SRR1, srr1); + STACK_PT_REGS_OFFSET(_CSRR0, csrr0); + STACK_PT_REGS_OFFSET(_CSRR1, csrr1); + STACK_PT_REGS_OFFSET(_DSRR0, dsrr0); + STACK_PT_REGS_OFFSET(_DSRR1, dsrr1); #endif /* About the CPU features table */ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 764edd860ed4..68e5c0a7e99d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -300,7 +300,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1 prepare_transfer_to_handler - lwz r5, _DSISR(r11) + lwz r5, _DSISR(r1) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f bl do_page_fault diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 87b806e8eded..e5503420b6c6 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -168,20 +168,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx -#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) - #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ slwi r8,r8,2; \ addis r8,r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #else #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ lis r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #endif /* @@ -208,7 +206,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mtmsr r11; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ + addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\ beq 1f; \ /* COMING FROM USER MODE */ \ stw r9,_CCR(r11); /* save CR */\ @@ -516,24 +514,5 @@ label: bl kernel_fp_unavailable_exception; \ b interrupt_return -#else /* __ASSEMBLY__ */ -struct exception_regs { - unsigned long mas0; - unsigned long mas1; - unsigned long mas2; - unsigned long mas3; - unsigned long mas6; - unsigned long mas7; - unsigned long srr0; - unsigned long srr1; - unsigned long csrr0; - unsigned long csrr1; - unsigned long dsrr0; - unsigned long dsrr1; -}; - -/* ensure this structure is always sized to a multiple of the stack alignment */ -#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16) - #endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 91e63eac4e8f..551b653228c4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -750,7 +750,7 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } -DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; @@ -774,6 +774,11 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) set_irq_regs(old_regs); } +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +{ + __do_IRQ(regs); +} + static void *__init alloc_vm_stack(void) { return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP, diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index cbc28d1a2e1b..7a7cd6bda53e 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -292,7 +292,8 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; - if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + if (!IS_ENABLED(CONFIG_BOOKE) && + (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))) return 0; /* diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5ff0e55d0db1..defecb3b1b15 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1167,7 +1167,7 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (smp_ops->cpu_offline_self) + if (smp_ops && smp_ops->cpu_offline_self) c->hotpluggable = 1; #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e45ce427bffb..c487ba5a6e11 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -586,7 +586,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) - do_IRQ(regs); + __do_IRQ(regs); #endif old_regs = set_irq_regs(regs); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dfbce527c98e..d56254f05e17 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1104,7 +1104,7 @@ DEFINE_INTERRUPT_HANDLER(RunModeException) _exception(SIGTRAP, regs, TRAP_UNK, 0); } -DEFINE_INTERRUPT_HANDLER(single_step_exception) +static void __single_step_exception(struct pt_regs *regs) { clear_single_step(regs); clear_br_trace(regs); @@ -1121,6 +1121,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); } +DEFINE_INTERRUPT_HANDLER(single_step_exception) +{ + __single_step_exception(regs); +} + /* * After we have successfully emulated an instruction, we have to * check if the instruction was being single-stepped, and if so, @@ -1130,7 +1135,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) static void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) - single_step_exception(regs); + __single_step_exception(regs); } static inline int __parse_fpscr(unsigned long fpscr) diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 2813e3f98db6..3c5baaa6f1e7 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -27,6 +27,13 @@ KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both + +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. +ccflags-y += $(call cc-option, -ffixed-r30) + asflags-y := -D__VDSO64__ -s targets += vdso64.lds diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 631a0d57b6cd..0dfaa6ab44cc 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -77,7 +77,7 @@ #include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); -EXPORT_SYMBOL_GPL(shared_processor); +EXPORT_SYMBOL(shared_processor); int CMO_PrPSP = -1; int CMO_SecPSP = -1; @@ -539,9 +539,10 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if * H_CPU_BEHAV_FAVOUR_SECURITY is. */ - if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) { security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); - else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 0; + } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) pseries_security_flavor = 1; else pseries_security_flavor = 2; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index dbdbbc2f1dc5..943fd30095af 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain; static struct xive_ipi_desc { unsigned int irq; char name[16]; + atomic_t started; } *xive_ipis; /* @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = { .alloc = xive_ipi_irq_domain_alloc, }; -static int __init xive_request_ipi(void) +static int __init xive_init_ipis(void) { struct fwnode_handle *fwnode; struct irq_domain *ipi_domain; @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void) struct xive_ipi_desc *xid = &xive_ipis[node]; struct xive_ipi_alloc_info info = { node }; - /* Skip nodes without CPUs */ - if (cpumask_empty(cpumask_of_node(node))) - continue; - /* * Map one IPI interrupt per node for all cpus of that node. * Since the HW interrupt number doesn't have any meaning, @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void) xid->irq = ret; snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); - - ret = request_irq(xid->irq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); - - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); } return ret; @@ -1178,6 +1170,22 @@ out: return ret; } +static int __init xive_request_ipi(unsigned int cpu) +{ + struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; + int ret; + + if (atomic_inc_return(&xid->started) > 1) + return 0; + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, + xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + return ret; +} + static int xive_setup_cpu_ipi(unsigned int cpu) { unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu) if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; + /* Register the IPI */ + xive_request_ipi(cpu); + /* Grab an IPI from the backend, this will populate xc->hw_ipi */ if (xive_ops->get_ipi(cpu, xc)) return -EIO; @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) if (xc->hw_ipi == XIVE_BAD_IRQ) return; + /* TODO: clear IPI mapping */ + /* Mask the IPI */ xive_do_source_set_mask(&xc->ipi_data, true); @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void) smp_ops->cause_ipi = xive_cause_ipi; /* Register the IPI */ - xive_request_ipi(); + xive_init_ipis(); /* Allocate and setup IPI for the boot CPU */ xive_setup_cpu_ipi(smp_processor_id()); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8fcceb8eda07..4f7b70ae7c31 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -492,10 +492,16 @@ config CC_HAVE_STACKPROTECTOR_TLS config STACKPROTECTOR_PER_TASK def_bool y + depends on !GCC_PLUGIN_RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS +config PHYS_RAM_BASE_FIXED + bool "Explicitly specified physical RAM address" + default n + config PHYS_RAM_BASE hex "Platform Physical RAM address" + depends on PHYS_RAM_BASE_FIXED default "0x80000000" help This is the physical address of RAM in the system. It has to be @@ -508,6 +514,7 @@ config XIP_KERNEL # This prevents XIP from being enabled by all{yes,mod}config, which # fail to build since XIP doesn't support large kernels. depends on !COMPILE_TEST + select PHYS_RAM_BASE_FIXED help Execute-In-Place allows the kernel to run from non-volatile storage directly addressable by the CPU, such as NOR flash. This saves RAM diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index b1c3c596578f..2e4ea84f27e7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -24,7 +24,7 @@ memory@80000000 { device_type = "memory"; - reg = <0x0 0x80000000 0x2 0x00000000>; + reg = <0x0 0x80000000 0x4 0x00000000>; }; soc { diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index cca8764aed83..b0ca5058e7ae 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -103,6 +103,7 @@ struct kernel_mapping { }; extern struct kernel_mapping kernel_map; +extern phys_addr_t phys_ram_base; #ifdef CONFIG_64BIT #define is_kernel_mapping(x) \ @@ -113,9 +114,9 @@ extern struct kernel_mapping kernel_map; #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ unsigned long _y = y; \ - (_y >= CONFIG_PHYS_RAM_BASE) ? \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET) : \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index d3081e4d9600..3397ddac1a30 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -11,7 +11,7 @@ endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_KEXEC -AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax +AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) endif extra-y += head.o diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index ac7593607fa6..315db3d0229b 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,7 +27,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == current) { + } else if (task == NULL || task == current) { fp = (unsigned long)__builtin_frame_address(1); sp = (unsigned long)__builtin_frame_address(0); pc = (unsigned long)__builtin_return_address(0); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a14bf3910eec..7cb4f391d106 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -36,6 +36,9 @@ EXPORT_SYMBOL(kernel_map); #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) #endif +phys_addr_t phys_ram_base __ro_after_init; +EXPORT_SYMBOL(phys_ram_base); + #ifdef CONFIG_XIP_KERNEL extern char _xiprom[], _exiprom[]; #endif @@ -160,7 +163,7 @@ static void __init setup_bootmem(void) phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); phys_addr_t __maybe_unused max_mapped_addr; - phys_addr_t dram_end; + phys_addr_t phys_ram_end; #ifdef CONFIG_XIP_KERNEL vmlinux_start = __pa_symbol(&_sdata); @@ -181,9 +184,12 @@ static void __init setup_bootmem(void) #endif memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - dram_end = memblock_end_of_DRAM(); + phys_ram_end = memblock_end_of_DRAM(); #ifndef CONFIG_64BIT +#ifndef CONFIG_XIP_KERNEL + phys_ram_base = memblock_start_of_DRAM(); +#endif /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -191,15 +197,15 @@ static void __init setup_bootmem(void) * if end of dram is equal to maximum addressable memory. For 64-bit * kernel, this problem can't happen here as the end of the virtual * address space is occupied by the kernel mapping then this check must - * be done in create_kernel_page_table. + * be done as soon as the kernel mapping base address is determined. */ max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (dram_end - 1)) + if (max_mapped_addr == (phys_ram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); #endif - min_low_pfn = PFN_UP(memblock_start_of_DRAM()); - max_low_pfn = max_pfn = PFN_DOWN(dram_end); + min_low_pfn = PFN_UP(phys_ram_base); + max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -558,6 +564,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); + phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 660c799d875d..e30d3fdbbc78 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -11,6 +11,7 @@ UBSAN_SANITIZE := n KASAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c new file mode 100644 index 000000000000..c3ebf248596b --- /dev/null +++ b/arch/s390/boot/compressed/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7de253f766e8..b88184019af9 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b671642967ba..1667a3cdcf0a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index bff50b6acd6d..edf5ff1debe1 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index d4fb336d747b..4461ea151e49 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1eb45139fcc6..3092fbf9dbe4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void) return; for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { - /* Metrics and fake events don't have corresponding HW counters. */ - if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR)) - continue; - else if (i >= INTEL_PMC_IDX_FIXED) + if (i >= INTEL_PMC_IDX_FIXED) { + /* Metrics and fake events don't have corresponding HW counters. */ + if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) + continue; + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); - else + } else { wrmsrl(x86_pmu_event_addr(i), 0); + } } bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fca7a6e2242f..ac6fd2dabf6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ static int intel_pmu_handle_irq(struct pt_regs *regs) { - struct cpu_hw_events *cpuc; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool late_ack = hybrid_bit(cpuc->pmu, late_ack); + bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack); int loops; u64 status; int handled; int pmu_enabled; - cpuc = this_cpu_ptr(&cpu_hw_events); - /* * Save the PMU state. * It needs to be restored when leaving the handler. */ pmu_enabled = cpuc->enabled; /* - * No known reason to not always do late ACK, - * but just in case do it opt-in. + * In general, the early ACK is only applied for old platforms. + * For the big core starts from Haswell, the late ACK should be + * applied. + * For the small core after Tremont, we have to do the ACK right + * before re-enabling counters, which is in the middle of the + * NMI handler. */ - if (!x86_pmu.late_ack) + if (!late_ack && !mid_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); cpuc->enabled = 0; @@ -2958,6 +2962,8 @@ again: goto again; done: + if (mid_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); /* Only restore PMU state when it's active. See x86_pmu_disable(). */ cpuc->enabled = pmu_enabled; if (pmu_enabled) @@ -2969,7 +2975,7 @@ done: * have been reset. This avoids spurious NMIs on * Haswell CPUs. */ - if (x86_pmu.late_ack) + if (late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); return handled; } @@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void) static_branch_enable(&perf_is_hybrid); x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; - x86_pmu.late_ack = true; x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; @@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; + pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; @@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; pmu->cpu_type = hybrid_small; + pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->max_pebs_events = x86_pmu.max_pebs_events; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2bf1c7ea2758..e3ac05c97b5e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -656,6 +656,10 @@ struct x86_hybrid_pmu { struct event_constraint *event_constraints; struct event_constraint *pebs_constraints; struct extra_reg *extra_regs; + + unsigned int late_ack :1, + mid_ack :1, + enabled_ack :1; }; static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) @@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ })) +#define hybrid_bit(_pmu, _field) \ +({ \ + bool __Fp = x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +}) + enum hybrid_pmu_type { hybrid_big = 0x40, hybrid_small = 0x20, @@ -755,6 +769,7 @@ struct x86_pmu { /* PMI handler bits */ unsigned int late_ack :1, + mid_ack :1, enabled_ack :1; /* * sysfs attrs @@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags); static inline void x86_pmu_disable_event(struct perf_event *event) { + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrl(hwc->config_base, hwc->config & ~disable_mask); if (is_counter_pair(hwc)) wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 974cbfb1eefe..af6ce8d4c86a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1038,6 +1038,13 @@ struct kvm_arch { struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; + /* + * Protects marking pages unsync during page faults, as TDP MMU page + * faults only take mmu_lock for read. For simplicity, the unsync + * pages lock is always taken when marking pages unsync regardless of + * whether mmu_lock is held for read or write. + */ + spinlock_t mmu_unsync_pages_lock; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index e322676039f4..b00dbc5fac2b 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -184,6 +184,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) + #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d5c691a3208b..39224e035e47 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct irq_chip ioapic_ir_chip __read_mostly = { @@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static inline void init_IO_APIC_traps(void) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 44ebe25e7703..dbacb9ec8843 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) * The quirk bit is not set in this case. * - The new vector is the same as the old vector * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The interrupt is not yet started up * - The new destination CPU is the same as the old destination CPU */ if (!irqd_msi_nomask_quirk(irqd) || cfg->vector == old_cfg.vector || old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + !irqd_is_started(irqd) || cfg->dest_apicid == old_cfg.dest_apicid) { irq_msi_update_msg(irqd, cfg); return ret; @@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = msi_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, @@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = { .irq_mask = pci_msi_mask_irq, .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct msi_domain_info pci_msi_ir_domain_info = { @@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = dmar_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static int dmar_msi_init(struct irq_domain *domain, diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f07c10b87a87..57e4bb695ff9 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -285,15 +285,14 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >>= shift; } -static int __mon_event_count(u32 rmid, struct rmid_read *rr) +static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) { struct mbm_state *m; u64 chunks, tval; tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { - rr->val = tval; - return -EINVAL; + return tval; } switch (rr->evtid) { case QOS_L3_OCCUP_EVENT_ID: @@ -305,12 +304,6 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) case QOS_L3_MBM_LOCAL_EVENT_ID: m = &rr->d->mbm_local[rmid]; break; - default: - /* - * Code would never reach here because - * an invalid event id would fail the __rmid_read. - */ - return -EINVAL; } if (rr->first) { @@ -361,23 +354,29 @@ void mon_event_count(void *info) struct rdtgroup *rdtgrp, *entry; struct rmid_read *rr = info; struct list_head *head; + u64 ret_val; rdtgrp = rr->rgrp; - if (__mon_event_count(rdtgrp->mon.rmid, rr)) - return; + ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); /* - * For Ctrl groups read data from child monitor groups. + * For Ctrl groups read data from child monitor groups and + * add them together. Count events which are read successfully. + * Discard the rmid_read's reporting errors. */ head = &rdtgrp->mon.crdtgrp_list; if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->mon.rmid, rr)) - return; + if (__mon_event_count(entry->mon.rmid, rr) == 0) + ret_val = 0; } } + + /* Report error if none of rmid_reads are successful */ + if (ret_val) + rr->val = ret_val; } /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 08651a4e6aa0..42fc41dd0e1f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = { .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_write_msi_msg = hpet_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, }; static int hpet_msi_init(struct irq_domain *domain, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 739be5da3bca..fe03bd978761 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_mmu_after_set_cpuid(vcpu); } -static int is_efer_nx(void) -{ - return host_efer & EFER_NX; -} - -static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) -{ - int i; - struct kvm_cpuid_entry2 *e, *entry; - - entry = NULL; - for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { - e = &vcpu->arch.cpuid_entries[i]; - if (e->function == 0x80000001) { - entry = e; - break; - } - } - if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) { - cpuid_entry_clear(entry, X86_FEATURE_NX); - printk(KERN_INFO "kvm: guest NX capability removed\n"); - } -} - int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_entries = e2; vcpu->arch.cpuid_nent = cpuid->nent; - cpuid_fix_nx_cap(vcpu); kvm_update_cpuid_runtime(vcpu); kvm_vcpu_after_set_cpuid(vcpu); @@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) void kvm_set_cpu_caps(void) { - unsigned int f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 unsigned int f_gbpages = F(GBPAGES); unsigned int f_lm = F(LM); @@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void) F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* Reserved */ | - f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | + F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW) ); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b07592ca92f0..41d2a53c5dea 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1933,7 +1933,7 @@ ret_success: void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *entry; - struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + struct kvm_vcpu_hv *hv_vcpu; entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0); if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) { @@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); @@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_hv_hcall hc; u64 ret = HV_STATUS_SUCCESS; @@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); - if (hc.fast && is_xmm_fast_hypercall(&hc)) - kvm_hv_hypercall_read_xmm(&hc); - trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); - if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) { + if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) { ret = HV_STATUS_ACCESS_DENIED; goto hypercall_complete; } + if (hc.fast && is_xmm_fast_hypercall(&hc)) { + if (unlikely(hv_vcpu->enforce_cpuid && + !(hv_vcpu->cpuid_cache.features_edx & + HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + kvm_hv_hypercall_read_xmm(&hc); + } + switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: if (unlikely(hc.rep)) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..47b765270239 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); @@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *sp; + bool locked = false; /* * Force write-protection if the page is being tracked. Note, the page @@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) if (sp->unsync) continue; + /* + * TDP MMU page faults require an additional spinlock as they + * run with mmu_lock held for read, not write, and the unsync + * logic is not thread safe. Take the spinklock regardless of + * the MMU type to avoid extra conditionals/parameters, there's + * no meaningful penalty if mmu_lock is held for write. + */ + if (!locked) { + locked = true; + spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock); + + /* + * Recheck after taking the spinlock, a different vCPU + * may have since marked the page unsync. A false + * positive on the unprotected check above is not + * possible as clearing sp->unsync _must_ hold mmu_lock + * for write, i.e. unsync cannot transition from 0->1 + * while this CPU holds mmu_lock for read (or write). + */ + if (READ_ONCE(sp->unsync)) + continue; + } + WARN_ON(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(vcpu, sp); } + if (locked) + spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock); /* * We need to ensure that the marking of unsync pages is visible @@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); + if (!kvm_mmu_init_tdp_mmu(kvm)) /* * No smp_load/store wrappers needed here as we are in diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 0853370bd811..d80cb122b5f3 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) if (!kvm->arch.tdp_mmu_enabled) return; + WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); - kvm_lockdep_assert_mmu_lock_held(kvm, shared); if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) @@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, list_del_rcu(&root->link); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); - zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared); + zap_gfn_range(kvm, root, 0, -1ull, false, false, shared); call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); } @@ -724,13 +723,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, bool can_yield, bool flush, bool shared) { + gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT); + bool zap_all = (start == 0 && end >= max_gfn_host); struct tdp_iter iter; + /* + * No need to try to step down in the iterator when zapping all SPTEs, + * zapping the top-level non-leaf SPTEs will recurse on their children. + */ + int min_level = zap_all ? root->role.level : PG_LEVEL_4K; + + /* + * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will + * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF, + * and so KVM will never install a SPTE for such addresses. + */ + end = min(end, max_gfn_host); + kvm_lockdep_assert_mmu_lock_held(kvm, shared); rcu_read_lock(); - tdp_root_for_each_pte(iter, root, start, end) { + for_each_tdp_pte_min_level(iter, root->spt, root->role.level, + min_level, start, end) { retry: if (can_yield && tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) { @@ -744,9 +759,10 @@ retry: /* * If this is a non-last-level SPTE that covers a larger range * than should be zapped, continue, and zap the mappings at a - * lower level. + * lower level, except when zapping all SPTEs. */ - if ((iter.gfn < start || + if (!zap_all && + (iter.gfn < start || iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && !is_last_spte(iter.old_spte, iter.level)) continue; @@ -794,12 +810,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, void kvm_tdp_mmu_zap_all(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush = false; int i; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn, + flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush, false); if (flush) @@ -838,7 +853,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm, */ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); struct kvm_mmu_page *next_root; struct kvm_mmu_page *root; bool flush = false; @@ -854,8 +868,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) rcu_read_unlock(); - flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush, - true); + flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true); /* * Put the reference acquired in diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 61738ff8ef33..e5515477c30a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -158,6 +158,9 @@ void recalc_intercepts(struct vcpu_svm *svm) /* If SMI is not intercepted, ignore guest SMI intercept as well */ if (!intercept_smi) vmcb_clr_intercept(c, INTERCEPT_SMI); + + vmcb_set_intercept(c, INTERCEPT_VMLOAD); + vmcb_set_intercept(c, INTERCEPT_VMSAVE); } static void copy_vmcb_control_area(struct vmcb_control_area *dst, @@ -503,7 +506,11 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) { - const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK; + const u32 int_ctl_vmcb01_bits = + V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK; + + const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; + struct kvm_vcpu *vcpu = &svm->vcpu; /* @@ -535,8 +542,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; svm->vmcb->control.int_ctl = - (svm->nested.ctl.int_ctl & ~mask) | - (svm->vmcb01.ptr->control.int_ctl & mask); + (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | + (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits); svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..7fbce342eec4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long sev_me_mask; +static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; @@ -78,11 +79,11 @@ struct enc_region { /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(int min_asid, int max_asid) { - int ret, pos, error = 0; + int ret, asid, error = 0; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid); - if (pos >= max_asid) + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); + if (asid > max_asid) return -EBUSY; /* @@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, - max_sev_asid); - bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + nr_asids); + bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); return true; } static int sev_asid_new(struct kvm_sev_info *sev) { - int pos, min_asid, max_asid, ret; + int asid, min_asid, max_asid, ret; bool retry = true; enum misc_res_type type; @@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; + min_asid = sev->es_active ? 1 : min_sev_asid; max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); - if (pos >= max_asid) { + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; @@ -157,11 +158,11 @@ again: goto e_uncharge; } - __set_bit(pos, sev_asid_bitmap); + __set_bit(asid, sev_asid_bitmap); mutex_unlock(&sev_bitmap_lock); - return pos + 1; + return asid; e_uncharge: misc_cg_uncharge(type, sev->misc_cg, 1); put_misc_cg(sev->misc_cg); @@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm) static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; - int cpu, pos; + int cpu; enum misc_res_type type; mutex_lock(&sev_bitmap_lock); - pos = sev->asid - 1; - __set_bit(pos, sev_reclaim_asid_bitmap); + __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); - sd->sev_vmcbs[pos] = NULL; + sd->sev_vmcbs[sev->asid] = NULL; } mutex_unlock(&sev_bitmap_lock); @@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void) min_sev_asid = edx; sev_me_mask = 1UL << (ebx & 0x3f); - /* Initialize SEV ASID bitmaps */ - sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + /* + * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, + * even though it's never used, so that the bitmap is indexed by the + * actual ASID. + */ + nr_asids = max_sev_asid + 1; + sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_asid_bitmap) goto out; - sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) { bitmap_free(sev_asid_bitmap); sev_asid_bitmap = NULL; @@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void) return; /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ - sev_flush_asids(0, max_sev_asid); + sev_flush_asids(1, max_sev_asid); bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); @@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) if (!sev_enabled) return 0; - sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); + sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) return -ENOMEM; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e8ccab50ebf6..69639f9624f5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1589,17 +1589,18 @@ static void svm_set_vintr(struct vcpu_svm *svm) static void svm_clear_vintr(struct vcpu_svm *svm) { - const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK; svm_clr_intercept(svm, INTERCEPT_VINTR); /* Drop int_ctl fields related to VINTR injection. */ - svm->vmcb->control.int_ctl &= mask; + svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; if (is_guest_mode(&svm->vcpu)) { - svm->vmcb01.ptr->control.int_ctl &= mask; + svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != (svm->nested.ctl.int_ctl & V_TPR_MASK)); - svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; + + svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & + V_IRQ_INJECTION_BITS_MASK; } vmcb_mark_dirty(svm->vmcb, VMCB_INTR); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b484141ea15b..03ebe368333e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_hv_hypercall_done, + TP_PROTO(u64 result), + TP_ARGS(result), + + TP_STRUCT__entry( + __field(__u64, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result 0x%llx", __entry->result) +); + /* * Tracepoint for Xen hypercall. */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a52134b0c42..b3f77d18eb5a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -330,6 +330,31 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) vcpu_put(vcpu); } +#define EPTP_PA_MASK GENMASK_ULL(51, 12) + +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) +{ + return VALID_PAGE(root_hpa) && + ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); +} + +static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, + gpa_t addr) +{ + uint i; + struct kvm_mmu_root_info *cached_root; + + WARN_ON_ONCE(!mmu_is_nested(vcpu)); + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + cached_root = &vcpu->arch.mmu->prev_roots[i]; + + if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd, + eptp)) + vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa); + } +} + static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { @@ -342,10 +367,22 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, vm_exit_reason = EXIT_REASON_PML_FULL; vmx->nested.pml_full = false; exit_qualification &= INTR_INFO_UNBLOCK_NMI; - } else if (fault->error_code & PFERR_RSVD_MASK) - vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; - else - vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + } else { + if (fault->error_code & PFERR_RSVD_MASK) + vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; + else + vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + + /* + * Although the caller (kvm_inject_emulated_page_fault) would + * have already synced the faulting address in the shadow EPT + * tables for the current EPTP12, we also need to sync it for + * any other cached EPTP02s based on the same EP4TA, since the + * TLB associates mappings to the EP4TA rather than the full EPTP. + */ + nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer, + fault->address); + } nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); vmcs12->guest_physical_address = fault->address; @@ -5325,14 +5362,6 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return nested_vmx_succeed(vcpu); } -#define EPTP_PA_MASK GENMASK_ULL(51, 12) - -static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) -{ - return VALID_PAGE(root_hpa) && - ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); -} - /* Emulate the INVEPT instruction */ static int handle_invept(struct kvm_vcpu *vcpu) { @@ -5826,7 +5855,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, if (is_nmi(intr_info)) return true; else if (is_page_fault(intr_info)) - return vcpu->arch.apf.host_apf_flags || !enable_ept; + return vcpu->arch.apf.host_apf_flags || + vmx_need_pf_intercept(vcpu); else if (is_debug(intr_info) && vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index db88ed4f2121..17a1cb4b059d 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -522,7 +522,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow) static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) { - return vmx->secondary_exec_control & + return secondary_exec_controls_get(vmx) & SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4116567f3d44..e5d5c5ed7dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { - return kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + /* + * Do not cause an interrupt window exit if an exception + * is pending or an event needs reinjection; userspace + * might want to inject the interrupt manually using KVM_SET_REGS + * or KVM_SET_SREGS. For that to work, we must be at an + * instruction boundary and with no events half-injected. + */ + return (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_accept_dm_intr(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + !vcpu->arch.exception.pending); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index fd1ab80be0de..a4cf678cf5c8 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -10,6 +10,7 @@ BEGIN { /^GNU objdump/ { verstr = "" + gsub(/\(.*\)/, ""); for (i = 3; i <= NF; i++) if (match($(i), "^[0-9]")) { verstr = $(i); diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 04c5a44b9682..9ba700dc47de 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -57,12 +57,12 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { [S_REL] = "^(__init_(begin|end)|" "__x86_cpu_dev_(start|end)|" - "(__parainstructions|__alt_instructions)(|_end)|" - "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" + "(__parainstructions|__alt_instructions)(_end)?|" + "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|" "__(start|end)_pci_.*|" "__(start|end)_builtin_fw|" - "__(start|stop)___ksymtab(|_gpl)|" - "__(start|stop)___kcrctab(|_gpl)|" + "__(start|stop)___ksymtab(_gpl)?|" + "__(start|stop)___kcrctab(_gpl)?|" "__(start|stop)___param|" "__(start|stop)___modver|" "__(start|stop)___bug_table|" diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 64053d67a97b..2f2158e05a91 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -9,12 +9,6 @@ config MQ_IOSCHED_DEADLINE help MQ version of the deadline IO scheduler. -config MQ_IOSCHED_DEADLINE_CGROUP - tristate - default y - depends on MQ_IOSCHED_DEADLINE - depends on BLK_CGROUP - config MQ_IOSCHED_KYBER tristate "Kyber I/O scheduler" default y diff --git a/block/Makefile b/block/Makefile index bfbe4e13ca1e..1e1afa10f869 100644 --- a/block/Makefile +++ b/block/Makefile @@ -22,8 +22,6 @@ obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o -mq-deadline-y += mq-deadline-main.o -mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 575d7a2e7203..31fe9be179d9 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -790,6 +790,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) struct blkcg_gq *parent = blkg->parent; struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); struct blkg_iostat cur, delta; + unsigned long flags; unsigned int seq; /* fetch the current per-cpu values */ @@ -799,21 +800,21 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) } while (u64_stats_fetch_retry(&bisc->sync, seq)); /* propagate percpu delta to global */ - u64_stats_update_begin(&blkg->iostat.sync); + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); blkg_iostat_set(&delta, &cur); blkg_iostat_sub(&delta, &bisc->last); blkg_iostat_add(&blkg->iostat.cur, &delta); blkg_iostat_add(&bisc->last, &delta); - u64_stats_update_end(&blkg->iostat.sync); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); /* propagate global delta to parent (unless that's root) */ if (parent && parent->parent) { - u64_stats_update_begin(&parent->iostat.sync); + flags = u64_stats_update_begin_irqsave(&parent->iostat.sync); blkg_iostat_set(&delta, &blkg->iostat.cur); blkg_iostat_sub(&delta, &blkg->iostat.last); blkg_iostat_add(&parent->iostat.cur, &delta); blkg_iostat_add(&blkg->iostat.last, &delta); - u64_stats_update_end(&parent->iostat.sync); + u64_stats_update_end_irqrestore(&parent->iostat.sync, flags); } } @@ -848,6 +849,7 @@ static void blkcg_fill_root_iostats(void) memset(&tmp, 0, sizeof(tmp)); for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; + unsigned long flags; cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += @@ -864,9 +866,9 @@ static void blkcg_fill_root_iostats(void) tmp.bytes[BLKG_IOSTAT_DISCARD] += cpu_dkstats->sectors[STAT_DISCARD] << 9; - u64_stats_update_begin(&blkg->iostat.sync); + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end(&blkg->iostat.sync); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } } } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 5fac3757e6e0..0e56557cacf2 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3061,19 +3061,19 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) return -EINVAL; - spin_lock(&blkcg->lock); + spin_lock_irq(&blkcg->lock); iocc->dfl_weight = v * WEIGHT_ONE; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct ioc_gq *iocg = blkg_to_iocg(blkg); if (iocg) { - spin_lock_irq(&iocg->ioc->lock); + spin_lock(&iocg->ioc->lock); ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); - spin_unlock_irq(&iocg->ioc->lock); + spin_unlock(&iocg->ioc->lock); } } - spin_unlock(&blkcg->lock); + spin_unlock_irq(&blkcg->lock); return nbytes; } diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 81be0096411d..d8b0d8bd132b 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -833,7 +833,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, enable = iolatency_set_min_lat_nsec(blkg, lat_val); if (enable) { - WARN_ON_ONCE(!blk_get_queue(blkg->q)); + if (!blk_get_queue(blkg->q)) { + ret = -ENODEV; + goto out; + } + blkg_get(blkg); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 2c4ac51e54eb..2fe396385a4a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2994,10 +2994,12 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) int i; queue_for_each_hw_ctx(q, hctx, i) { - if (shared) + if (shared) { hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; - else + } else { + blk_mq_tag_idle(hctx); hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; + } } } diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 81e3279ecd57..15a8be57203d 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -596,13 +596,13 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *head = &kcq->rq_list[sched_domain]; spin_lock(&kcq->lock); + trace_block_rq_insert(rq); if (at_head) list_move(&rq->queuelist, head); else list_move_tail(&rq->queuelist, head); sbitmap_set_bit(&khd->kcq_map[sched_domain], rq->mq_ctx->index_hw[hctx->type]); - trace_block_rq_insert(rq); spin_unlock(&kcq->lock); } } diff --git a/block/mq-deadline-cgroup.c b/block/mq-deadline-cgroup.c deleted file mode 100644 index 3b4bfddec39f..000000000000 --- a/block/mq-deadline-cgroup.c +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/blk-cgroup.h> -#include <linux/ioprio.h> - -#include "mq-deadline-cgroup.h" - -static struct blkcg_policy dd_blkcg_policy; - -static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp) -{ - struct dd_blkcg *pd; - - pd = kzalloc(sizeof(*pd), gfp); - if (!pd) - return NULL; - pd->stats = alloc_percpu_gfp(typeof(*pd->stats), - GFP_KERNEL | __GFP_ZERO); - if (!pd->stats) { - kfree(pd); - return NULL; - } - return &pd->cpd; -} - -static void dd_cpd_free(struct blkcg_policy_data *cpd) -{ - struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd); - - free_percpu(dd_blkcg->stats); - kfree(dd_blkcg); -} - -static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd) -{ - return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy), - struct dd_blkcg, cpd); -} - -/* - * Convert an association between a block cgroup and a request queue into a - * pointer to the mq-deadline information associated with a (blkcg, queue) pair. - */ -struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio) -{ - struct blkg_policy_data *pd; - - pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy); - if (!pd) - return NULL; - - return dd_blkcg_from_pd(pd); -} - -static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size) -{ - static const char *const prio_class_name[] = { - [IOPRIO_CLASS_NONE] = "NONE", - [IOPRIO_CLASS_RT] = "RT", - [IOPRIO_CLASS_BE] = "BE", - [IOPRIO_CLASS_IDLE] = "IDLE", - }; - struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd); - int res = 0; - u8 prio; - - for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++) - res += scnprintf(buf + res, size - res, - " [%s] dispatched=%u inserted=%u merged=%u", - prio_class_name[prio], - ddcg_sum(blkcg, dispatched, prio) + - ddcg_sum(blkcg, merged, prio) - - ddcg_sum(blkcg, completed, prio), - ddcg_sum(blkcg, inserted, prio) - - ddcg_sum(blkcg, completed, prio), - ddcg_sum(blkcg, merged, prio)); - - return res; -} - -static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q, - struct blkcg *blkcg) -{ - struct dd_blkg *pd; - - pd = kzalloc(sizeof(*pd), gfp); - if (!pd) - return NULL; - return &pd->pd; -} - -static void dd_pd_free(struct blkg_policy_data *pd) -{ - struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd); - - kfree(dd_blkg); -} - -static struct blkcg_policy dd_blkcg_policy = { - .cpd_alloc_fn = dd_cpd_alloc, - .cpd_free_fn = dd_cpd_free, - - .pd_alloc_fn = dd_pd_alloc, - .pd_free_fn = dd_pd_free, - .pd_stat_fn = dd_pd_stat, -}; - -int dd_activate_policy(struct request_queue *q) -{ - return blkcg_activate_policy(q, &dd_blkcg_policy); -} - -void dd_deactivate_policy(struct request_queue *q) -{ - blkcg_deactivate_policy(q, &dd_blkcg_policy); -} - -int __init dd_blkcg_init(void) -{ - return blkcg_policy_register(&dd_blkcg_policy); -} - -void __exit dd_blkcg_exit(void) -{ - blkcg_policy_unregister(&dd_blkcg_policy); -} diff --git a/block/mq-deadline-cgroup.h b/block/mq-deadline-cgroup.h deleted file mode 100644 index 0143fd74f3ce..000000000000 --- a/block/mq-deadline-cgroup.h +++ /dev/null @@ -1,114 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#if !defined(_MQ_DEADLINE_CGROUP_H_) -#define _MQ_DEADLINE_CGROUP_H_ - -#include <linux/blk-cgroup.h> - -struct request_queue; - -/** - * struct io_stats_per_prio - I/O statistics per I/O priority class. - * @inserted: Number of inserted requests. - * @merged: Number of merged requests. - * @dispatched: Number of dispatched requests. - * @completed: Number of I/O completions. - */ -struct io_stats_per_prio { - local_t inserted; - local_t merged; - local_t dispatched; - local_t completed; -}; - -/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */ -struct blkcg_io_stats { - struct io_stats_per_prio stats[4]; -}; - -/** - * struct dd_blkcg - Per cgroup data. - * @cpd: blkcg_policy_data structure. - * @stats: I/O statistics. - */ -struct dd_blkcg { - struct blkcg_policy_data cpd; /* must be the first member */ - struct blkcg_io_stats __percpu *stats; -}; - -/* - * Count one event of type 'event_type' and with I/O priority class - * 'prio_class'. - */ -#define ddcg_count(ddcg, event_type, prio_class) do { \ -if (ddcg) { \ - struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats); \ - \ - BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \ - BUILD_BUG_ON(!__same_type((prio_class), u8)); \ - local_inc(&io_stats->stats[(prio_class)].event_type); \ - put_cpu_ptr(io_stats); \ -} \ -} while (0) - -/* - * Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls - * across all CPUs. No locking or barriers since it is fine if the returned - * sum is slightly outdated. - */ -#define ddcg_sum(ddcg, event_type, prio) ({ \ - unsigned int cpu; \ - u32 sum = 0; \ - \ - BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \ - BUILD_BUG_ON(!__same_type((prio), u8)); \ - for_each_present_cpu(cpu) \ - sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)-> \ - stats[(prio)].event_type); \ - sum; \ -}) - -#ifdef CONFIG_BLK_CGROUP - -/** - * struct dd_blkg - Per (cgroup, request queue) data. - * @pd: blkg_policy_data structure. - */ -struct dd_blkg { - struct blkg_policy_data pd; /* must be the first member */ -}; - -struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio); -int dd_activate_policy(struct request_queue *q); -void dd_deactivate_policy(struct request_queue *q); -int __init dd_blkcg_init(void); -void __exit dd_blkcg_exit(void); - -#else /* CONFIG_BLK_CGROUP */ - -static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio) -{ - return NULL; -} - -static inline int dd_activate_policy(struct request_queue *q) -{ - return 0; -} - -static inline void dd_deactivate_policy(struct request_queue *q) -{ -} - -static inline int dd_blkcg_init(void) -{ - return 0; -} - -static inline void dd_blkcg_exit(void) -{ -} - -#endif /* CONFIG_BLK_CGROUP */ - -#endif /* _MQ_DEADLINE_CGROUP_H_ */ diff --git a/block/mq-deadline-main.c b/block/mq-deadline.c index 6f612e6dc82b..a09761cbdf12 100644 --- a/block/mq-deadline-main.c +++ b/block/mq-deadline.c @@ -25,7 +25,6 @@ #include "blk-mq-debugfs.h" #include "blk-mq-tag.h" #include "blk-mq-sched.h" -#include "mq-deadline-cgroup.h" /* * See Documentation/block/deadline-iosched.rst @@ -57,6 +56,14 @@ enum dd_prio { enum { DD_PRIO_COUNT = 3 }; +/* I/O statistics per I/O priority. */ +struct io_stats_per_prio { + local_t inserted; + local_t merged; + local_t dispatched; + local_t completed; +}; + /* I/O statistics for all I/O priorities (enum dd_prio). */ struct io_stats { struct io_stats_per_prio stats[DD_PRIO_COUNT]; @@ -79,9 +86,6 @@ struct deadline_data { * run time data */ - /* Request queue that owns this data structure. */ - struct request_queue *queue; - struct dd_per_prio per_prio[DD_PRIO_COUNT]; /* Data direction of latest dispatched request. */ @@ -234,10 +238,8 @@ static void dd_merged_requests(struct request_queue *q, struct request *req, struct deadline_data *dd = q->elevator->elevator_data; const u8 ioprio_class = dd_rq_ioclass(next); const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; - struct dd_blkcg *blkcg = next->elv.priv[0]; dd_count(dd, merged, prio); - ddcg_count(blkcg, merged, ioprio_class); /* * if next expires before rq, assign its expire time to rq @@ -375,7 +377,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd, { struct request *rq, *next_rq; enum dd_data_dir data_dir; - struct dd_blkcg *blkcg; enum dd_prio prio; u8 ioprio_class; @@ -474,8 +475,6 @@ done: ioprio_class = dd_rq_ioclass(rq); prio = ioprio_class_to_prio[ioprio_class]; dd_count(dd, dispatched, prio); - blkcg = rq->elv.priv[0]; - ddcg_count(blkcg, dispatched, ioprio_class); /* * If the request needs its target zone locked, do it. */ @@ -569,8 +568,6 @@ static void dd_exit_sched(struct elevator_queue *e) struct deadline_data *dd = e->elevator_data; enum dd_prio prio; - dd_deactivate_policy(dd->queue); - for (prio = 0; prio <= DD_PRIO_MAX; prio++) { struct dd_per_prio *per_prio = &dd->per_prio[prio]; @@ -584,7 +581,7 @@ static void dd_exit_sched(struct elevator_queue *e) } /* - * Initialize elevator private data (deadline_data) and associate with blkcg. + * initialize elevator private data (deadline_data). */ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) { @@ -593,12 +590,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) enum dd_prio prio; int ret = -ENOMEM; - /* - * Initialization would be very tricky if the queue is not frozen, - * hence the warning statement below. - */ - WARN_ON_ONCE(!percpu_ref_is_zero(&q->q_usage_counter)); - eq = elevator_alloc(q, e); if (!eq) return ret; @@ -614,8 +605,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) if (!dd->stats) goto free_dd; - dd->queue = q; - for (prio = 0; prio <= DD_PRIO_MAX; prio++) { struct dd_per_prio *per_prio = &dd->per_prio[prio]; @@ -635,17 +624,9 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) spin_lock_init(&dd->lock); spin_lock_init(&dd->zone_lock); - ret = dd_activate_policy(q); - if (ret) - goto free_stats; - - ret = 0; q->elevator = eq; return 0; -free_stats: - free_percpu(dd->stats); - free_dd: kfree(dd); @@ -718,7 +699,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio); struct dd_per_prio *per_prio; enum dd_prio prio; - struct dd_blkcg *blkcg; LIST_HEAD(free); lockdep_assert_held(&dd->lock); @@ -729,18 +709,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, */ blk_req_zone_write_unlock(rq); - /* - * If a block cgroup has been associated with the submitter and if an - * I/O priority has been set in the associated block cgroup, use the - * lowest of the cgroup priority and the request priority for the - * request. If no priority has been set in the request, use the cgroup - * priority. - */ prio = ioprio_class_to_prio[ioprio_class]; dd_count(dd, inserted, prio); - blkcg = dd_blkcg_from_bio(rq->bio); - ddcg_count(blkcg, inserted, ioprio_class); - rq->elv.priv[0] = blkcg; if (blk_mq_sched_try_insert_merge(q, rq, &free)) { blk_mq_free_requests(&free); @@ -789,10 +759,12 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, spin_unlock(&dd->lock); } -/* Callback from inside blk_mq_rq_ctx_init(). */ +/* + * Nothing to do here. This is defined only to ensure that .finish_request + * method is called upon request completion. + */ static void dd_prepare_request(struct request *rq) { - rq->elv.priv[0] = NULL; } /* @@ -815,13 +787,11 @@ static void dd_finish_request(struct request *rq) { struct request_queue *q = rq->q; struct deadline_data *dd = q->elevator->elevator_data; - struct dd_blkcg *blkcg = rq->elv.priv[0]; const u8 ioprio_class = dd_rq_ioclass(rq); const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; struct dd_per_prio *per_prio = &dd->per_prio[prio]; dd_count(dd, completed, prio); - ddcg_count(blkcg, completed, ioprio_class); if (blk_queue_is_zoned(q)) { unsigned long flags; @@ -1144,26 +1114,11 @@ MODULE_ALIAS("mq-deadline-iosched"); static int __init deadline_init(void) { - int ret; - - ret = elv_register(&mq_deadline); - if (ret) - goto out; - ret = dd_blkcg_init(); - if (ret) - goto unreg; - -out: - return ret; - -unreg: - elv_unregister(&mq_deadline); - goto out; + return elv_register(&mq_deadline); } static void __exit deadline_exit(void) { - dd_blkcg_exit(); elv_unregister(&mq_deadline); } diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index cc86534c80ad..b8b518d7fb77 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> diff --git a/crypto/Kconfig b/crypto/Kconfig index ca3b02dcbbfa..64b772c5d1c9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1768,7 +1768,7 @@ config CRYPTO_DRBG_HMAC bool default y select CRYPTO_HMAC - select CRYPTO_SHA256 + select CRYPTO_SHA512 config CRYPTO_DRBG_HASH bool "Enable Hash DRBG" diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 38e10ab976e6..14b71b41e845 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -379,13 +379,6 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info, (*element_ptr)->common.reference_count = original_ref_count; - - /* - * The original_element holds a reference from the package object - * that represents _HID. Since a new element was created by _HID, - * remove the reference from the _CID package. - */ - acpi_ut_remove_reference(original_element); } element_ptr++; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 23d9a09d7060..a3ef6cce644c 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3021,6 +3021,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; struct nd_mapping_desc *mapping; + /* range index 0 == unmapped in SPA or invalid-SPA */ + if (memdev->range_index == 0 || spa->range_index == 0) + continue; if (memdev->range_index != spa->range_index) continue; if (count >= ND_MAX_MAPPINGS) { diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 4f2951cbe69c..d0e67ec46216 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2167,10 +2167,10 @@ static int hrz_open (struct atm_vcc *atm_vcc) // Part of the job is done by atm_pcr_goal which gives us a PCR // specification which says: EITHER grab the maximum available PCR - // (and perhaps a lower bound which we musn't pass), OR grab this + // (and perhaps a lower bound which we must not pass), OR grab this // amount, rounding down if you have to (and perhaps a lower bound - // which we musn't pass) OR grab this amount, rounding up if you - // have to (and perhaps an upper bound which we musn't pass). If any + // which we must not pass) OR grab this amount, rounding up if you + // have to (and perhaps an upper bound which we must not pass). If any // bounds ARE passed we fail. Note that rounding is only rounding to // match device limitations, we do not round down to satisfy // bandwidth availability even if this would not violate any given diff --git a/drivers/base/core.c b/drivers/base/core.c index f6360490a4a3..6c0ef9d55a34 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2837,6 +2837,7 @@ void device_initialize(struct device *dev) device_pm_init(dev); set_dev_node(dev, -1); #ifdef CONFIG_GENERIC_MSI_IRQ + raw_spin_lock_init(&dev->msi_lock); INIT_LIST_HEAD(&dev->msi_list); #endif INIT_LIST_HEAD(&dev->links.consumers); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index daeb9b5763ae..437cd61343b2 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -653,8 +653,6 @@ dev_groups_failed: else if (drv->remove) drv->remove(dev); probe_failed: - kfree(dev->dma_range_map); - dev->dma_range_map = NULL; if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); @@ -662,6 +660,8 @@ pinctrl_bind_failed: device_links_no_driver(dev); devres_release_all(dev); arch_teardown_dma_ops(dev); + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 91899d185e31..d7d63c1aa993 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -89,12 +89,11 @@ static void __fw_load_abort(struct fw_priv *fw_priv) { /* * There is a small window in which user can write to 'loading' - * between loading done and disappearance of 'loading' + * between loading done/aborted and disappearance of 'loading' */ - if (fw_sysfs_done(fw_priv)) + if (fw_state_is_aborted(fw_priv) || fw_sysfs_done(fw_priv)) return; - list_del_init(&fw_priv->pending_list); fw_state_aborted(fw_priv); } @@ -280,7 +279,6 @@ static ssize_t firmware_loading_store(struct device *dev, * Same logic as fw_load_abort, only the DONE bit * is ignored and we set ABORT only on failure. */ - list_del_init(&fw_priv->pending_list); if (rc) { fw_state_aborted(fw_priv); written = rc; @@ -513,6 +511,11 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) } mutex_lock(&fw_lock); + if (fw_state_is_aborted(fw_priv)) { + mutex_unlock(&fw_lock); + retval = -EINTR; + goto out; + } list_add(&fw_priv->pending_list, &pending_fw_head); mutex_unlock(&fw_lock); @@ -535,11 +538,10 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) if (fw_state_is_aborted(fw_priv)) { if (retval == -ERESTARTSYS) retval = -EINTR; - else - retval = -EAGAIN; } else if (fw_priv->is_paged_buf && !fw_priv->data) retval = -ENOMEM; +out: device_del(f_dev); err_put_dev: put_device(f_dev); diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index 63bd29fdcb9c..a3014e9e2c85 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -117,8 +117,16 @@ static inline void __fw_state_set(struct fw_priv *fw_priv, WRITE_ONCE(fw_st->status, status); - if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) + if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) { +#ifdef CONFIG_FW_LOADER_USER_HELPER + /* + * Doing this here ensures that the fw_priv is deleted from + * the pending list in all abort/done paths. + */ + list_del_init(&fw_priv->pending_list); +#endif complete_all(&fw_st->completion); + } } static inline void fw_state_aborted(struct fw_priv *fw_priv) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 4fdb8219cd08..68c549d71230 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -783,8 +783,10 @@ static void fw_abort_batch_reqs(struct firmware *fw) return; fw_priv = fw->priv; + mutex_lock(&fw_lock); if (!fw_state_is_aborted(fw_priv)) fw_state_aborted(fw_priv); + mutex_unlock(&fw_lock); } /* called from request_firmware() and request_firmware_work_func() */ diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 7b4dd10af9ec..c84be0028f63 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -74,7 +74,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos) n64cart_wait_dma(); - n64cart_write_reg(PI_DRAM_REG, dma_addr + bv->bv_offset); + n64cart_write_reg(PI_DRAM_REG, dma_addr); n64cart_write_reg(PI_CART_REG, (bstart | CART_DOMAIN) & CART_MAX); n64cart_write_reg(PI_WRITE_REG, bv->bv_len - 1); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c38317979f74..19f5d5a8b16a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -818,6 +818,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); + /* don't abort one completed request */ + if (blk_mq_request_completed(req)) + return true; + mutex_lock(&cmd->lock); cmd->status = BLK_STS_IOERR; mutex_unlock(&cmd->lock); @@ -2004,15 +2008,19 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) { mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); - nbd_clear_sock(nbd); - mutex_unlock(&nbd->config_lock); + sock_shutdown(nbd); /* * Make sure recv thread has finished, so it does not drop the last * config ref and try to destroy the workqueue from inside the work - * queue. + * queue. And this also ensure that we can safely call nbd_clear_que() + * to cancel the inflight I/Os. */ if (nbd->recv_workq) flush_workqueue(nbd->recv_workq); + nbd_clear_que(nbd); + nbd->task_setup = NULL; + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4b49df2dfd23..afb37aac09e8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -692,6 +692,28 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); +static int virtblk_validate(struct virtio_device *vdev) +{ + u32 blk_size; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + if (!virtio_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) + return 0; + + blk_size = virtio_cread32(vdev, + offsetof(struct virtio_blk_config, blk_size)); + + if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) + __virtio_clear_bit(vdev, VIRTIO_BLK_F_BLK_SIZE); + + return 0; +} + static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -703,12 +725,6 @@ static int virtblk_probe(struct virtio_device *vdev) u8 physical_block_exp, alignment_offset; unsigned int queue_depth; - if (!vdev->config->get) { - dev_err(&vdev->dev, "%s failure: config access disabled\n", - __func__); - return -EINVAL; - } - err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), GFP_KERNEL); if (err < 0) @@ -823,6 +839,14 @@ static int virtblk_probe(struct virtio_device *vdev) else blk_size = queue_logical_block_size(q); + if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) { + dev_err(&vdev->dev, + "block size is changed unexpectedly, now is %u\n", + blk_size); + err = -EINVAL; + goto err_cleanup_disk; + } + /* Use topology information if available */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp, @@ -881,6 +905,8 @@ static int virtblk_probe(struct virtio_device *vdev) device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; +err_cleanup_disk: + blk_cleanup_disk(vblk->disk); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_free_vq: @@ -983,6 +1009,7 @@ static struct virtio_driver virtio_blk = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtblk_validate, .probe = virtblk_probe, .remove = virtblk_remove, .config_changed = virtblk_config_changed, diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index e5d706ed55ea..e4182acee488 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -387,6 +387,7 @@ struct bcm_subver_table { }; static const struct bcm_subver_table bcm_uart_subver_table[] = { + { 0x1111, "BCM4362A2" }, /* 000.017.017 */ { 0x4103, "BCM4330B1" }, /* 002.001.003 */ { 0x410d, "BCM4334B0" }, /* 002.001.013 */ { 0x410e, "BCM43341B0" }, /* 002.001.014 */ diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index e44b6993cf91..f1705b46fc88 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -131,6 +131,26 @@ int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) } EXPORT_SYMBOL_GPL(btintel_set_bdaddr); +static int btintel_set_event_mask(struct hci_dev *hdev, bool debug) +{ + u8 mask[8] = { 0x87, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + struct sk_buff *skb; + int err; + + if (debug) + mask[1] |= 0x62; + + skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "Setting Intel event mask failed (%d)", err); + return err; + } + kfree_skb(skb); + + return 0; +} + int btintel_set_diag(struct hci_dev *hdev, bool enable) { struct sk_buff *skb; @@ -164,7 +184,7 @@ done: } EXPORT_SYMBOL_GPL(btintel_set_diag); -int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable) +static int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable) { int err, ret; @@ -180,9 +200,25 @@ int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable) return ret; } -EXPORT_SYMBOL_GPL(btintel_set_diag_mfg); -void btintel_hw_error(struct hci_dev *hdev, u8 code) +static int btintel_set_diag_combined(struct hci_dev *hdev, bool enable) +{ + int ret; + + /* Legacy ROM device needs to be in the manufacturer mode to apply + * diagnostic setting + * + * This flag is set after reading the Intel version. + */ + if (btintel_test_flag(hdev, INTEL_ROM_LEGACY)) + ret = btintel_set_diag_mfg(hdev, enable); + else + ret = btintel_set_diag(hdev, enable); + + return ret; +} + +static void btintel_hw_error(struct hci_dev *hdev, u8 code) { struct sk_buff *skb; u8 type = 0x00; @@ -214,7 +250,6 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) kfree_skb(skb); } -EXPORT_SYMBOL_GPL(btintel_hw_error); int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver) { @@ -236,6 +271,8 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver) * compatibility options when newer hardware variants come along. */ switch (ver->hw_variant) { + case 0x07: /* WP - Legacy ROM */ + case 0x08: /* StP - Legacy ROM */ case 0x0b: /* SfP */ case 0x0c: /* WsP */ case 0x11: /* JfP */ @@ -250,9 +287,15 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver) } switch (ver->fw_variant) { + case 0x01: + variant = "Legacy ROM 2.5"; + break; case 0x06: variant = "Bootloader"; break; + case 0x22: + variant = "Legacy ROM 2.x"; + break; case 0x23: variant = "Firmware"; break; @@ -270,8 +313,8 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver) } EXPORT_SYMBOL_GPL(btintel_version_info); -int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen, - const void *param) +static int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen, + const void *param) { while (plen > 0) { struct sk_buff *skb; @@ -293,7 +336,6 @@ int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen, return 0; } -EXPORT_SYMBOL_GPL(btintel_secure_send); int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name) { @@ -340,27 +382,6 @@ int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name) } EXPORT_SYMBOL_GPL(btintel_load_ddc_config); -int btintel_set_event_mask(struct hci_dev *hdev, bool debug) -{ - u8 mask[8] = { 0x87, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - struct sk_buff *skb; - int err; - - if (debug) - mask[1] |= 0x62; - - skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - bt_dev_err(hdev, "Setting Intel event mask failed (%d)", err); - return err; - } - kfree_skb(skb); - - return 0; -} -EXPORT_SYMBOL_GPL(btintel_set_event_mask); - int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug) { int err, ret; @@ -404,7 +425,8 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver) } EXPORT_SYMBOL_GPL(btintel_read_version); -int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version) +static int btintel_version_info_tlv(struct hci_dev *hdev, + struct intel_version_tlv *version) { const char *variant; @@ -481,30 +503,11 @@ int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver return 0; } -EXPORT_SYMBOL_GPL(btintel_version_info_tlv); -int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *version) +static int btintel_parse_version_tlv(struct hci_dev *hdev, + struct intel_version_tlv *version, + struct sk_buff *skb) { - struct sk_buff *skb; - const u8 param[1] = { 0xFF }; - - if (!version) - return -EINVAL; - - skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) { - bt_dev_err(hdev, "Reading Intel version information failed (%ld)", - PTR_ERR(skb)); - return PTR_ERR(skb); - } - - if (skb->data[0]) { - bt_dev_err(hdev, "Intel Read Version command failed (%02x)", - skb->data[0]); - kfree_skb(skb); - return -EIO; - } - /* Consume Command Complete Status field */ skb_pull(skb, 1); @@ -516,7 +519,16 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver while (skb->len) { struct intel_tlv *tlv; + /* Make sure skb has a minimum length of the header */ + if (skb->len < sizeof(*tlv)) + return -EINVAL; + tlv = (struct intel_tlv *)skb->data; + + /* Make sure skb has a enough data */ + if (skb->len < tlv->len + sizeof(*tlv)) + return -EINVAL; + switch (tlv->type) { case INTEL_TLV_CNVI_TOP: version->cnvi_top = get_unaligned_le32(tlv->val); @@ -580,7 +592,8 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver version->sbe_type = tlv->val[0]; break; case INTEL_TLV_OTP_BDADDR: - memcpy(&version->otp_bd_addr, tlv->val, tlv->len); + memcpy(&version->otp_bd_addr, tlv->val, + sizeof(bdaddr_t)); break; default: /* Ignore rest of information */ @@ -590,10 +603,37 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver skb_pull(skb, tlv->len + sizeof(*tlv)); } + return 0; +} + +static int btintel_read_version_tlv(struct hci_dev *hdev, + struct intel_version_tlv *version) +{ + struct sk_buff *skb; + const u8 param[1] = { 0xFF }; + + if (!version) + return -EINVAL; + + skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Reading Intel version information failed (%ld)", + PTR_ERR(skb)); + return PTR_ERR(skb); + } + + if (skb->data[0]) { + bt_dev_err(hdev, "Intel Read Version command failed (%02x)", + skb->data[0]); + kfree_skb(skb); + return -EIO; + } + + btintel_parse_version_tlv(hdev, version, skb); + kfree_skb(skb); return 0; } -EXPORT_SYMBOL_GPL(btintel_read_version_tlv); /* ------- REGMAP IBT SUPPORT ------- */ @@ -1066,10 +1106,10 @@ int btintel_download_firmware(struct hci_dev *hdev, } EXPORT_SYMBOL_GPL(btintel_download_firmware); -int btintel_download_firmware_newgen(struct hci_dev *hdev, - struct intel_version_tlv *ver, - const struct firmware *fw, u32 *boot_param, - u8 hw_variant, u8 sbe_type) +static int btintel_download_fw_tlv(struct hci_dev *hdev, + struct intel_version_tlv *ver, + const struct firmware *fw, u32 *boot_param, + u8 hw_variant, u8 sbe_type) { int err; u32 css_header_ver; @@ -1166,9 +1206,8 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev, } return 0; } -EXPORT_SYMBOL_GPL(btintel_download_firmware_newgen); -void btintel_reset_to_bootloader(struct hci_dev *hdev) +static void btintel_reset_to_bootloader(struct hci_dev *hdev) { struct intel_reset params; struct sk_buff *skb; @@ -1211,10 +1250,9 @@ void btintel_reset_to_bootloader(struct hci_dev *hdev) */ msleep(150); } -EXPORT_SYMBOL_GPL(btintel_reset_to_bootloader); -int btintel_read_debug_features(struct hci_dev *hdev, - struct intel_debug_features *features) +static int btintel_read_debug_features(struct hci_dev *hdev, + struct intel_debug_features *features) { struct sk_buff *skb; u8 page_no = 1; @@ -1243,9 +1281,8 @@ int btintel_read_debug_features(struct hci_dev *hdev, kfree_skb(skb); return 0; } -EXPORT_SYMBOL_GPL(btintel_read_debug_features); -int btintel_set_debug_features(struct hci_dev *hdev, +static int btintel_set_debug_features(struct hci_dev *hdev, const struct intel_debug_features *features) { u8 mask[11] = { 0x0a, 0x92, 0x02, 0x07, 0x00, 0x00, 0x00, 0x00, @@ -1270,7 +1307,1154 @@ int btintel_set_debug_features(struct hci_dev *hdev, kfree_skb(skb); return 0; } -EXPORT_SYMBOL_GPL(btintel_set_debug_features); + +static const struct firmware *btintel_legacy_rom_get_fw(struct hci_dev *hdev, + struct intel_version *ver) +{ + const struct firmware *fw; + char fwname[64]; + int ret; + + snprintf(fwname, sizeof(fwname), + "intel/ibt-hw-%x.%x.%x-fw-%x.%x.%x.%x.%x.bseq", + ver->hw_platform, ver->hw_variant, ver->hw_revision, + ver->fw_variant, ver->fw_revision, ver->fw_build_num, + ver->fw_build_ww, ver->fw_build_yy); + + ret = request_firmware(&fw, fwname, &hdev->dev); + if (ret < 0) { + if (ret == -EINVAL) { + bt_dev_err(hdev, "Intel firmware file request failed (%d)", + ret); + return NULL; + } + + bt_dev_err(hdev, "failed to open Intel firmware file: %s (%d)", + fwname, ret); + + /* If the correct firmware patch file is not found, use the + * default firmware patch file instead + */ + snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.bseq", + ver->hw_platform, ver->hw_variant); + if (request_firmware(&fw, fwname, &hdev->dev) < 0) { + bt_dev_err(hdev, "failed to open default fw file: %s", + fwname); + return NULL; + } + } + + bt_dev_info(hdev, "Intel Bluetooth firmware file: %s", fwname); + + return fw; +} + +static int btintel_legacy_rom_patching(struct hci_dev *hdev, + const struct firmware *fw, + const u8 **fw_ptr, int *disable_patch) +{ + struct sk_buff *skb; + struct hci_command_hdr *cmd; + const u8 *cmd_param; + struct hci_event_hdr *evt = NULL; + const u8 *evt_param = NULL; + int remain = fw->size - (*fw_ptr - fw->data); + + /* The first byte indicates the types of the patch command or event. + * 0x01 means HCI command and 0x02 is HCI event. If the first bytes + * in the current firmware buffer doesn't start with 0x01 or + * the size of remain buffer is smaller than HCI command header, + * the firmware file is corrupted and it should stop the patching + * process. + */ + if (remain > HCI_COMMAND_HDR_SIZE && *fw_ptr[0] != 0x01) { + bt_dev_err(hdev, "Intel fw corrupted: invalid cmd read"); + return -EINVAL; + } + (*fw_ptr)++; + remain--; + + cmd = (struct hci_command_hdr *)(*fw_ptr); + *fw_ptr += sizeof(*cmd); + remain -= sizeof(*cmd); + + /* Ensure that the remain firmware data is long enough than the length + * of command parameter. If not, the firmware file is corrupted. + */ + if (remain < cmd->plen) { + bt_dev_err(hdev, "Intel fw corrupted: invalid cmd len"); + return -EFAULT; + } + + /* If there is a command that loads a patch in the firmware + * file, then enable the patch upon success, otherwise just + * disable the manufacturer mode, for example patch activation + * is not required when the default firmware patch file is used + * because there are no patch data to load. + */ + if (*disable_patch && le16_to_cpu(cmd->opcode) == 0xfc8e) + *disable_patch = 0; + + cmd_param = *fw_ptr; + *fw_ptr += cmd->plen; + remain -= cmd->plen; + + /* This reads the expected events when the above command is sent to the + * device. Some vendor commands expects more than one events, for + * example command status event followed by vendor specific event. + * For this case, it only keeps the last expected event. so the command + * can be sent with __hci_cmd_sync_ev() which returns the sk_buff of + * last expected event. + */ + while (remain > HCI_EVENT_HDR_SIZE && *fw_ptr[0] == 0x02) { + (*fw_ptr)++; + remain--; + + evt = (struct hci_event_hdr *)(*fw_ptr); + *fw_ptr += sizeof(*evt); + remain -= sizeof(*evt); + + if (remain < evt->plen) { + bt_dev_err(hdev, "Intel fw corrupted: invalid evt len"); + return -EFAULT; + } + + evt_param = *fw_ptr; + *fw_ptr += evt->plen; + remain -= evt->plen; + } + + /* Every HCI commands in the firmware file has its correspond event. + * If event is not found or remain is smaller than zero, the firmware + * file is corrupted. + */ + if (!evt || !evt_param || remain < 0) { + bt_dev_err(hdev, "Intel fw corrupted: invalid evt read"); + return -EFAULT; + } + + skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cmd->opcode), cmd->plen, + cmd_param, evt->evt, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + bt_dev_err(hdev, "sending Intel patch command (0x%4.4x) failed (%ld)", + cmd->opcode, PTR_ERR(skb)); + return PTR_ERR(skb); + } + + /* It ensures that the returned event matches the event data read from + * the firmware file. At fist, it checks the length and then + * the contents of the event. + */ + if (skb->len != evt->plen) { + bt_dev_err(hdev, "mismatch event length (opcode 0x%4.4x)", + le16_to_cpu(cmd->opcode)); + kfree_skb(skb); + return -EFAULT; + } + + if (memcmp(skb->data, evt_param, evt->plen)) { + bt_dev_err(hdev, "mismatch event parameter (opcode 0x%4.4x)", + le16_to_cpu(cmd->opcode)); + kfree_skb(skb); + return -EFAULT; + } + kfree_skb(skb); + + return 0; +} + +static int btintel_legacy_rom_setup(struct hci_dev *hdev, + struct intel_version *ver) +{ + const struct firmware *fw; + const u8 *fw_ptr; + int disable_patch, err; + struct intel_version new_ver; + + BT_DBG("%s", hdev->name); + + /* fw_patch_num indicates the version of patch the device currently + * have. If there is no patch data in the device, it is always 0x00. + * So, if it is other than 0x00, no need to patch the device again. + */ + if (ver->fw_patch_num) { + bt_dev_info(hdev, + "Intel device is already patched. patch num: %02x", + ver->fw_patch_num); + goto complete; + } + + /* Opens the firmware patch file based on the firmware version read + * from the controller. If it fails to open the matching firmware + * patch file, it tries to open the default firmware patch file. + * If no patch file is found, allow the device to operate without + * a patch. + */ + fw = btintel_legacy_rom_get_fw(hdev, ver); + if (!fw) + goto complete; + fw_ptr = fw->data; + + /* Enable the manufacturer mode of the controller. + * Only while this mode is enabled, the driver can download the + * firmware patch data and configuration parameters. + */ + err = btintel_enter_mfg(hdev); + if (err) { + release_firmware(fw); + return err; + } + + disable_patch = 1; + + /* The firmware data file consists of list of Intel specific HCI + * commands and its expected events. The first byte indicates the + * type of the message, either HCI command or HCI event. + * + * It reads the command and its expected event from the firmware file, + * and send to the controller. Once __hci_cmd_sync_ev() returns, + * the returned event is compared with the event read from the firmware + * file and it will continue until all the messages are downloaded to + * the controller. + * + * Once the firmware patching is completed successfully, + * the manufacturer mode is disabled with reset and activating the + * downloaded patch. + * + * If the firmware patching fails, the manufacturer mode is + * disabled with reset and deactivating the patch. + * + * If the default patch file is used, no reset is done when disabling + * the manufacturer. + */ + while (fw->size > fw_ptr - fw->data) { + int ret; + + ret = btintel_legacy_rom_patching(hdev, fw, &fw_ptr, + &disable_patch); + if (ret < 0) + goto exit_mfg_deactivate; + } + + release_firmware(fw); + + if (disable_patch) + goto exit_mfg_disable; + + /* Patching completed successfully and disable the manufacturer mode + * with reset and activate the downloaded firmware patches. + */ + err = btintel_exit_mfg(hdev, true, true); + if (err) + return err; + + /* Need build number for downloaded fw patches in + * every power-on boot + */ + err = btintel_read_version(hdev, &new_ver); + if (err) + return err; + + bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated", + new_ver.fw_patch_num); + + goto complete; + +exit_mfg_disable: + /* Disable the manufacturer mode without reset */ + err = btintel_exit_mfg(hdev, false, false); + if (err) + return err; + + bt_dev_info(hdev, "Intel firmware patch completed"); + + goto complete; + +exit_mfg_deactivate: + release_firmware(fw); + + /* Patching failed. Disable the manufacturer mode with reset and + * deactivate the downloaded firmware patches. + */ + err = btintel_exit_mfg(hdev, true, false); + if (err) + return err; + + bt_dev_info(hdev, "Intel firmware patch completed and deactivated"); + +complete: + /* Set the event mask for Intel specific vendor events. This enables + * a few extra events that are useful during general operation. + */ + btintel_set_event_mask_mfg(hdev, false); + + btintel_check_bdaddr(hdev); + + return 0; +} + +static int btintel_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec) +{ + ktime_t delta, rettime; + unsigned long long duration; + int err; + + btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED); + + bt_dev_info(hdev, "Waiting for firmware download to complete"); + + err = btintel_wait_on_flag_timeout(hdev, INTEL_DOWNLOADING, + TASK_INTERRUPTIBLE, + msecs_to_jiffies(msec)); + if (err == -EINTR) { + bt_dev_err(hdev, "Firmware loading interrupted"); + return err; + } + + if (err) { + bt_dev_err(hdev, "Firmware loading timeout"); + return -ETIMEDOUT; + } + + if (btintel_test_flag(hdev, INTEL_FIRMWARE_FAILED)) { + bt_dev_err(hdev, "Firmware loading failed"); + return -ENOEXEC; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long)ktime_to_ns(delta) >> 10; + + bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration); + + return 0; +} + +static int btintel_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec) +{ + ktime_t delta, rettime; + unsigned long long duration; + int err; + + bt_dev_info(hdev, "Waiting for device to boot"); + + err = btintel_wait_on_flag_timeout(hdev, INTEL_BOOTING, + TASK_INTERRUPTIBLE, + msecs_to_jiffies(msec)); + if (err == -EINTR) { + bt_dev_err(hdev, "Device boot interrupted"); + return -EINTR; + } + + if (err) { + bt_dev_err(hdev, "Device boot timeout"); + return -ETIMEDOUT; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + bt_dev_info(hdev, "Device booted in %llu usecs", duration); + + return 0; +} + +static int btintel_boot(struct hci_dev *hdev, u32 boot_addr) +{ + ktime_t calltime; + int err; + + calltime = ktime_get(); + + btintel_set_flag(hdev, INTEL_BOOTING); + + err = btintel_send_intel_reset(hdev, boot_addr); + if (err) { + bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err); + btintel_reset_to_bootloader(hdev); + return err; + } + + /* The bootloader will not indicate when the device is ready. This + * is done by the operational firmware sending bootup notification. + * + * Booting into operational firmware should not take longer than + * 1 second. However if that happens, then just fail the setup + * since something went wrong. + */ + err = btintel_boot_wait(hdev, calltime, 1000); + if (err == -ETIMEDOUT) + btintel_reset_to_bootloader(hdev); + + return err; +} + +static int btintel_get_fw_name(struct intel_version *ver, + struct intel_boot_params *params, + char *fw_name, size_t len, + const char *suffix) +{ + switch (ver->hw_variant) { + case 0x0b: /* SfP */ + case 0x0c: /* WsP */ + snprintf(fw_name, len, "intel/ibt-%u-%u.%s", + le16_to_cpu(ver->hw_variant), + le16_to_cpu(params->dev_revid), + suffix); + break; + case 0x11: /* JfP */ + case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* CcP */ + snprintf(fw_name, len, "intel/ibt-%u-%u-%u.%s", + le16_to_cpu(ver->hw_variant), + le16_to_cpu(ver->hw_revision), + le16_to_cpu(ver->fw_revision), + suffix); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int btintel_download_fw(struct hci_dev *hdev, + struct intel_version *ver, + struct intel_boot_params *params, + u32 *boot_param) +{ + const struct firmware *fw; + char fwname[64]; + int err; + ktime_t calltime; + + if (!ver || !params) + return -EINVAL; + + /* The firmware variant determines if the device is in bootloader + * mode or is running operational firmware. The value 0x06 identifies + * the bootloader and the value 0x23 identifies the operational + * firmware. + * + * When the operational firmware is already present, then only + * the check for valid Bluetooth device address is needed. This + * determines if the device will be added as configured or + * unconfigured controller. + * + * It is not possible to use the Secure Boot Parameters in this + * case since that command is only available in bootloader mode. + */ + if (ver->fw_variant == 0x23) { + btintel_clear_flag(hdev, INTEL_BOOTLOADER); + btintel_check_bdaddr(hdev); + + /* SfP and WsP don't seem to update the firmware version on file + * so version checking is currently possible. + */ + switch (ver->hw_variant) { + case 0x0b: /* SfP */ + case 0x0c: /* WsP */ + return 0; + } + + /* Proceed to download to check if the version matches */ + goto download; + } + + /* Read the secure boot parameters to identify the operating + * details of the bootloader. + */ + err = btintel_read_boot_params(hdev, params); + if (err) + return err; + + /* It is required that every single firmware fragment is acknowledged + * with a command complete event. If the boot parameters indicate + * that this bootloader does not send them, then abort the setup. + */ + if (params->limited_cce != 0x00) { + bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)", + params->limited_cce); + return -EINVAL; + } + + /* If the OTP has no valid Bluetooth device address, then there will + * also be no valid address for the operational firmware. + */ + if (!bacmp(¶ms->otp_bdaddr, BDADDR_ANY)) { + bt_dev_info(hdev, "No device address configured"); + set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + } + +download: + /* With this Intel bootloader only the hardware variant and device + * revision information are used to select the right firmware for SfP + * and WsP. + * + * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi. + * + * Currently the supported hardware variants are: + * 11 (0x0b) for iBT3.0 (LnP/SfP) + * 12 (0x0c) for iBT3.5 (WsP) + * + * For ThP/JfP and for future SKU's, the FW name varies based on HW + * variant, HW revision and FW revision, as these are dependent on CNVi + * and RF Combination. + * + * 17 (0x11) for iBT3.5 (JfP) + * 18 (0x12) for iBT3.5 (ThP) + * + * The firmware file name for these will be + * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi. + * + */ + err = btintel_get_fw_name(ver, params, fwname, sizeof(fwname), "sfi"); + if (err < 0) { + if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) { + /* Firmware has already been loaded */ + btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED); + return 0; + } + + bt_dev_err(hdev, "Unsupported Intel firmware naming"); + return -EINVAL; + } + + err = firmware_request_nowarn(&fw, fwname, &hdev->dev); + if (err < 0) { + if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) { + /* Firmware has already been loaded */ + btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED); + return 0; + } + + bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", + fwname, err); + return err; + } + + bt_dev_info(hdev, "Found device firmware: %s", fwname); + + if (fw->size < 644) { + bt_dev_err(hdev, "Invalid size of firmware file (%zu)", + fw->size); + err = -EBADF; + goto done; + } + + calltime = ktime_get(); + + btintel_set_flag(hdev, INTEL_DOWNLOADING); + + /* Start firmware downloading and get boot parameter */ + err = btintel_download_firmware(hdev, ver, fw, boot_param); + if (err < 0) { + if (err == -EALREADY) { + /* Firmware has already been loaded */ + btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED); + err = 0; + goto done; + } + + /* When FW download fails, send Intel Reset to retry + * FW download. + */ + btintel_reset_to_bootloader(hdev); + goto done; + } + + /* Before switching the device into operational mode and with that + * booting the loaded firmware, wait for the bootloader notification + * that all fragments have been successfully received. + * + * When the event processing receives the notification, then the + * INTEL_DOWNLOADING flag will be cleared. + * + * The firmware loading should not take longer than 5 seconds + * and thus just timeout if that happens and fail the setup + * of this device. + */ + err = btintel_download_wait(hdev, calltime, 5000); + if (err == -ETIMEDOUT) + btintel_reset_to_bootloader(hdev); + +done: + release_firmware(fw); + return err; +} + +static int btintel_bootloader_setup(struct hci_dev *hdev, + struct intel_version *ver) +{ + struct intel_version new_ver; + struct intel_boot_params params; + u32 boot_param; + char ddcname[64]; + int err; + struct intel_debug_features features; + + BT_DBG("%s", hdev->name); + + /* Set the default boot parameter to 0x0 and it is updated to + * SKU specific boot parameter after reading Intel_Write_Boot_Params + * command while downloading the firmware. + */ + boot_param = 0x00000000; + + btintel_set_flag(hdev, INTEL_BOOTLOADER); + + err = btintel_download_fw(hdev, ver, ¶ms, &boot_param); + if (err) + return err; + + /* controller is already having an operational firmware */ + if (ver->fw_variant == 0x23) + goto finish; + + err = btintel_boot(hdev, boot_param); + if (err) + return err; + + btintel_clear_flag(hdev, INTEL_BOOTLOADER); + + err = btintel_get_fw_name(ver, ¶ms, ddcname, + sizeof(ddcname), "ddc"); + + if (err < 0) { + bt_dev_err(hdev, "Unsupported Intel firmware naming"); + } else { + /* Once the device is running in operational mode, it needs to + * apply the device configuration (DDC) parameters. + * + * The device can work without DDC parameters, so even if it + * fails to load the file, no need to fail the setup. + */ + btintel_load_ddc_config(hdev, ddcname); + } + + /* Read the Intel supported features and if new exception formats + * supported, need to load the additional DDC config to enable. + */ + err = btintel_read_debug_features(hdev, &features); + if (!err) { + /* Set DDC mask for available debug features */ + btintel_set_debug_features(hdev, &features); + } + + /* Read the Intel version information after loading the FW */ + err = btintel_read_version(hdev, &new_ver); + if (err) + return err; + + btintel_version_info(hdev, &new_ver); + +finish: + /* Set the event mask for Intel specific vendor events. This enables + * a few extra events that are useful during general operation. It + * does not enable any debugging related events. + * + * The device will function correctly without these events enabled + * and thus no need to fail the setup. + */ + btintel_set_event_mask(hdev, false); + + return 0; +} + +static void btintel_get_fw_name_tlv(const struct intel_version_tlv *ver, + char *fw_name, size_t len, + const char *suffix) +{ + /* The firmware file name for new generation controllers will be + * ibt-<cnvi_top type+cnvi_top step>-<cnvr_top type+cnvr_top step> + */ + snprintf(fw_name, len, "intel/ibt-%04x-%04x.%s", + INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver->cnvi_top), + INTEL_CNVX_TOP_STEP(ver->cnvi_top)), + INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver->cnvr_top), + INTEL_CNVX_TOP_STEP(ver->cnvr_top)), + suffix); +} + +static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev, + struct intel_version_tlv *ver, + u32 *boot_param) +{ + const struct firmware *fw; + char fwname[64]; + int err; + ktime_t calltime; + + if (!ver || !boot_param) + return -EINVAL; + + /* The firmware variant determines if the device is in bootloader + * mode or is running operational firmware. The value 0x03 identifies + * the bootloader and the value 0x23 identifies the operational + * firmware. + * + * When the operational firmware is already present, then only + * the check for valid Bluetooth device address is needed. This + * determines if the device will be added as configured or + * unconfigured controller. + * + * It is not possible to use the Secure Boot Parameters in this + * case since that command is only available in bootloader mode. + */ + if (ver->img_type == 0x03) { + btintel_clear_flag(hdev, INTEL_BOOTLOADER); + btintel_check_bdaddr(hdev); + } + + /* If the OTP has no valid Bluetooth device address, then there will + * also be no valid address for the operational firmware. + */ + if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) { + bt_dev_info(hdev, "No device address configured"); + set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + } + + btintel_get_fw_name_tlv(ver, fwname, sizeof(fwname), "sfi"); + err = firmware_request_nowarn(&fw, fwname, &hdev->dev); + if (err < 0) { + if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) { + /* Firmware has already been loaded */ + btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED); + return 0; + } + + bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", + fwname, err); + + return err; + } + + bt_dev_info(hdev, "Found device firmware: %s", fwname); + + if (fw->size < 644) { + bt_dev_err(hdev, "Invalid size of firmware file (%zu)", + fw->size); + err = -EBADF; + goto done; + } + + calltime = ktime_get(); + + btintel_set_flag(hdev, INTEL_DOWNLOADING); + + /* Start firmware downloading and get boot parameter */ + err = btintel_download_fw_tlv(hdev, ver, fw, boot_param, + INTEL_HW_VARIANT(ver->cnvi_bt), + ver->sbe_type); + if (err < 0) { + if (err == -EALREADY) { + /* Firmware has already been loaded */ + btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED); + err = 0; + goto done; + } + + /* When FW download fails, send Intel Reset to retry + * FW download. + */ + btintel_reset_to_bootloader(hdev); + goto done; + } + + /* Before switching the device into operational mode and with that + * booting the loaded firmware, wait for the bootloader notification + * that all fragments have been successfully received. + * + * When the event processing receives the notification, then the + * BTUSB_DOWNLOADING flag will be cleared. + * + * The firmware loading should not take longer than 5 seconds + * and thus just timeout if that happens and fail the setup + * of this device. + */ + err = btintel_download_wait(hdev, calltime, 5000); + if (err == -ETIMEDOUT) + btintel_reset_to_bootloader(hdev); + +done: + release_firmware(fw); + return err; +} + +static int btintel_bootloader_setup_tlv(struct hci_dev *hdev, + struct intel_version_tlv *ver) +{ + u32 boot_param; + char ddcname[64]; + int err; + struct intel_debug_features features; + struct intel_version_tlv new_ver; + + bt_dev_dbg(hdev, ""); + + /* Set the default boot parameter to 0x0 and it is updated to + * SKU specific boot parameter after reading Intel_Write_Boot_Params + * command while downloading the firmware. + */ + boot_param = 0x00000000; + + btintel_set_flag(hdev, INTEL_BOOTLOADER); + + err = btintel_prepare_fw_download_tlv(hdev, ver, &boot_param); + if (err) + return err; + + /* check if controller is already having an operational firmware */ + if (ver->img_type == 0x03) + goto finish; + + err = btintel_boot(hdev, boot_param); + if (err) + return err; + + btintel_clear_flag(hdev, INTEL_BOOTLOADER); + + btintel_get_fw_name_tlv(ver, ddcname, sizeof(ddcname), "ddc"); + /* Once the device is running in operational mode, it needs to + * apply the device configuration (DDC) parameters. + * + * The device can work without DDC parameters, so even if it + * fails to load the file, no need to fail the setup. + */ + btintel_load_ddc_config(hdev, ddcname); + + /* Read the Intel supported features and if new exception formats + * supported, need to load the additional DDC config to enable. + */ + err = btintel_read_debug_features(hdev, &features); + if (!err) { + /* Set DDC mask for available debug features */ + btintel_set_debug_features(hdev, &features); + } + + /* Read the Intel version information after loading the FW */ + err = btintel_read_version_tlv(hdev, &new_ver); + if (err) + return err; + + btintel_version_info_tlv(hdev, &new_ver); + +finish: + /* Set the event mask for Intel specific vendor events. This enables + * a few extra events that are useful during general operation. It + * does not enable any debugging related events. + * + * The device will function correctly without these events enabled + * and thus no need to fail the setup. + */ + btintel_set_event_mask(hdev, false); + + return 0; +} + +static void btintel_set_msft_opcode(struct hci_dev *hdev, u8 hw_variant) +{ + switch (hw_variant) { + /* Legacy bootloader devices that supports MSFT Extension */ + case 0x11: /* JfP */ + case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* CcP */ + /* All Intel new genration controllers support the Microsoft vendor + * extension are using 0xFC1E for VsMsftOpCode. + */ + case 0x17: + case 0x18: + case 0x19: + hci_set_msft_opcode(hdev, 0xFC1E); + break; + default: + /* Not supported */ + break; + } +} + +static int btintel_setup_combined(struct hci_dev *hdev) +{ + const u8 param[1] = { 0xFF }; + struct intel_version ver; + struct intel_version_tlv ver_tlv; + struct sk_buff *skb; + int err; + + BT_DBG("%s", hdev->name); + + /* The some controllers have a bug with the first HCI command sent to it + * returning number of completed commands as zero. This would stall the + * command processing in the Bluetooth core. + * + * As a workaround, send HCI Reset command first which will reset the + * number of completed commands and allow normal command processing + * from now on. + */ + if (btintel_test_flag(hdev, INTEL_BROKEN_INITIAL_NCMD)) { + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, + HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + bt_dev_err(hdev, + "sending initial HCI reset failed (%ld)", + PTR_ERR(skb)); + return PTR_ERR(skb); + } + kfree_skb(skb); + } + + /* Starting from TyP device, the command parameter and response are + * changed even though the OCF for HCI_Intel_Read_Version command + * remains same. The legacy devices can handle even if the + * command has a parameter and returns a correct version information. + * So, it uses new format to support both legacy and new format. + */ + skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Reading Intel version command failed (%ld)", + PTR_ERR(skb)); + return PTR_ERR(skb); + } + + /* Check the status */ + if (skb->data[0]) { + bt_dev_err(hdev, "Intel Read Version command failed (%02x)", + skb->data[0]); + err = -EIO; + goto exit_error; + } + + /* Apply the common HCI quirks for Intel device */ + set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); + set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); + + /* For Legacy device, check the HW platform value and size */ + if (skb->len == sizeof(ver) && skb->data[1] == 0x37) { + bt_dev_dbg(hdev, "Read the legacy Intel version information"); + + memcpy(&ver, skb->data, sizeof(ver)); + + /* Display version information */ + btintel_version_info(hdev, &ver); + + /* Check for supported iBT hardware variants of this firmware + * loading method. + * + * This check has been put in place to ensure correct forward + * compatibility options when newer hardware variants come + * along. + */ + switch (ver.hw_variant) { + case 0x07: /* WP */ + case 0x08: /* StP */ + /* Legacy ROM product */ + btintel_set_flag(hdev, INTEL_ROM_LEGACY); + + /* Apply the device specific HCI quirks + * + * WBS for SdP - SdP and Stp have a same hw_varaint but + * different fw_variant + */ + if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, + &hdev->quirks); + + /* These devices have an issue with LED which doesn't + * go off immediately during shutdown. Set the flag + * here to send the LED OFF command during shutdown. + */ + btintel_set_flag(hdev, INTEL_BROKEN_LED); + + err = btintel_legacy_rom_setup(hdev, &ver); + break; + case 0x0b: /* SfP */ + case 0x0c: /* WsP */ + case 0x11: /* JfP */ + case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* CcP */ + /* Apply the device specific HCI quirks + * + * All Legacy bootloader devices support WBS + */ + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, + &hdev->quirks); + + /* Valid LE States quirk for JfP/ThP familiy */ + if (ver.hw_variant == 0x11 || ver.hw_variant == 0x12) + set_bit(HCI_QUIRK_VALID_LE_STATES, + &hdev->quirks); + + /* Setup MSFT Extension support */ + btintel_set_msft_opcode(hdev, ver.hw_variant); + + err = btintel_bootloader_setup(hdev, &ver); + break; + default: + bt_dev_err(hdev, "Unsupported Intel hw variant (%u)", + ver.hw_variant); + err = -EINVAL; + } + + goto exit_error; + } + + /* For TLV type device, parse the tlv data */ + err = btintel_parse_version_tlv(hdev, &ver_tlv, skb); + if (err) { + bt_dev_err(hdev, "Failed to parse TLV version information"); + goto exit_error; + } + + if (INTEL_HW_PLATFORM(ver_tlv.cnvi_bt) != 0x37) { + bt_dev_err(hdev, "Unsupported Intel hardware platform (0x%2x)", + INTEL_HW_PLATFORM(ver_tlv.cnvi_bt)); + err = -EINVAL; + goto exit_error; + } + + /* Check for supported iBT hardware variants of this firmware + * loading method. + * + * This check has been put in place to ensure correct forward + * compatibility options when newer hardware variants come + * along. + */ + switch (INTEL_HW_VARIANT(ver_tlv.cnvi_bt)) { + case 0x11: /* JfP */ + case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* CcP */ + /* Some legacy bootloader devices from JfP supports both old + * and TLV based HCI_Intel_Read_Version command. But we don't + * want to use the TLV based setup routines for those legacy + * bootloader device. + * + * Also, it is not easy to convert TLV based version from the + * legacy version format. + * + * So, as a workaround for those devices, use the legacy + * HCI_Intel_Read_Version to get the version information and + * run the legacy bootloader setup. + */ + err = btintel_read_version(hdev, &ver); + if (err) + return err; + err = btintel_bootloader_setup(hdev, &ver); + break; + case 0x17: + case 0x18: + case 0x19: + /* Display version information of TLV type */ + btintel_version_info_tlv(hdev, &ver_tlv); + + /* Apply the device specific HCI quirks for TLV based devices + * + * All TLV based devices support WBS + */ + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + + /* Valid LE States quirk for GfP */ + if (INTEL_HW_VARIANT(ver_tlv.cnvi_bt) == 0x18) + set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + + /* Setup MSFT Extension support */ + btintel_set_msft_opcode(hdev, + INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); + + err = btintel_bootloader_setup_tlv(hdev, &ver_tlv); + break; + default: + bt_dev_err(hdev, "Unsupported Intel hw variant (%u)", + INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); + return -EINVAL; + } + +exit_error: + kfree_skb(skb); + + return err; +} + +static int btintel_shutdown_combined(struct hci_dev *hdev) +{ + struct sk_buff *skb; + int ret; + + /* Send HCI Reset to the controller to stop any BT activity which + * were triggered. This will help to save power and maintain the + * sync b/w Host and controller + */ + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + bt_dev_err(hdev, "HCI reset during shutdown failed"); + return PTR_ERR(skb); + } + kfree_skb(skb); + + + /* Some platforms have an issue with BT LED when the interface is + * down or BT radio is turned off, which takes 5 seconds to BT LED + * goes off. This command turns off the BT LED immediately. + */ + if (btintel_test_flag(hdev, INTEL_BROKEN_LED)) { + skb = __hci_cmd_sync(hdev, 0xfc3f, 0, NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + ret = PTR_ERR(skb); + bt_dev_err(hdev, "turning off Intel device LED failed"); + return ret; + } + kfree_skb(skb); + } + + return 0; +} + +int btintel_configure_setup(struct hci_dev *hdev) +{ + hdev->manufacturer = 2; + hdev->setup = btintel_setup_combined; + hdev->shutdown = btintel_shutdown_combined; + hdev->hw_error = btintel_hw_error; + hdev->set_diag = btintel_set_diag_combined; + hdev->set_bdaddr = btintel_set_bdaddr; + + return 0; +} +EXPORT_SYMBOL_GPL(btintel_configure_setup); + +void btintel_bootup(struct hci_dev *hdev, const void *ptr, unsigned int len) +{ + const struct intel_bootup *evt = ptr; + + if (len != sizeof(*evt)) + return; + + if (btintel_test_and_clear_flag(hdev, INTEL_BOOTING)) + btintel_wake_up_flag(hdev, INTEL_BOOTING); +} +EXPORT_SYMBOL_GPL(btintel_bootup); + +void btintel_secure_send_result(struct hci_dev *hdev, + const void *ptr, unsigned int len) +{ + const struct intel_secure_send_result *evt = ptr; + + if (len != sizeof(*evt)) + return; + + if (evt->result) + btintel_set_flag(hdev, INTEL_FIRMWARE_FAILED); + + if (btintel_test_and_clear_flag(hdev, INTEL_DOWNLOADING) && + btintel_test_flag(hdev, INTEL_FIRMWARE_LOADED)) + btintel_wake_up_flag(hdev, INTEL_DOWNLOADING); +} +EXPORT_SYMBOL_GPL(btintel_secure_send_result); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION); diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index d184064a5e7c..aa64072bbe68 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -138,6 +138,49 @@ struct intel_debug_features { #define INTEL_CNVX_TOP_STEP(cnvx_top) (((cnvx_top) & 0x0f000000) >> 24) #define INTEL_CNVX_TOP_PACK_SWAB(t, s) __swab16(((__u16)(((t) << 4) | (s)))) +enum { + INTEL_BOOTLOADER, + INTEL_DOWNLOADING, + INTEL_FIRMWARE_LOADED, + INTEL_FIRMWARE_FAILED, + INTEL_BOOTING, + INTEL_BROKEN_INITIAL_NCMD, + INTEL_BROKEN_LED, + INTEL_ROM_LEGACY, + + __INTEL_NUM_FLAGS, +}; + +struct btintel_data { + DECLARE_BITMAP(flags, __INTEL_NUM_FLAGS); +}; + +#define btintel_set_flag(hdev, nr) \ + do { \ + struct btintel_data *intel = hci_get_priv((hdev)); \ + set_bit((nr), intel->flags); \ + } while (0) + +#define btintel_clear_flag(hdev, nr) \ + do { \ + struct btintel_data *intel = hci_get_priv((hdev)); \ + clear_bit((nr), intel->flags); \ + } while (0) + +#define btintel_wake_up_flag(hdev, nr) \ + do { \ + struct btintel_data *intel = hci_get_priv((hdev)); \ + wake_up_bit(intel->flags, (nr)); \ + } while (0) + +#define btintel_get_flag(hdev) \ + (((struct btintel_data *)hci_get_priv(hdev))->flags) + +#define btintel_test_flag(hdev, nr) test_bit((nr), btintel_get_flag(hdev)) +#define btintel_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), btintel_get_flag(hdev)) +#define btintel_wait_on_flag_timeout(hdev, nr, m, to) \ + wait_on_bit_timeout(btintel_get_flag(hdev), (nr), m, to) + #if IS_ENABLED(CONFIG_BT_INTEL) int btintel_check_bdaddr(struct hci_dev *hdev); @@ -145,19 +188,11 @@ int btintel_enter_mfg(struct hci_dev *hdev); int btintel_exit_mfg(struct hci_dev *hdev, bool reset, bool patched); int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int btintel_set_diag(struct hci_dev *hdev, bool enable); -int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable); -void btintel_hw_error(struct hci_dev *hdev, u8 code); int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver); -int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version); -int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen, - const void *param); int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name); -int btintel_set_event_mask(struct hci_dev *hdev, bool debug); int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug); int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver); -int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver); - struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read, u16 opcode_write); int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param); @@ -165,16 +200,10 @@ int btintel_read_boot_params(struct hci_dev *hdev, struct intel_boot_params *params); int btintel_download_firmware(struct hci_dev *dev, struct intel_version *ver, const struct firmware *fw, u32 *boot_param); -int btintel_download_firmware_newgen(struct hci_dev *hdev, - struct intel_version_tlv *ver, - const struct firmware *fw, - u32 *boot_param, u8 hw_variant, - u8 sbe_type); -void btintel_reset_to_bootloader(struct hci_dev *hdev); -int btintel_read_debug_features(struct hci_dev *hdev, - struct intel_debug_features *features); -int btintel_set_debug_features(struct hci_dev *hdev, - const struct intel_debug_features *features); +int btintel_configure_setup(struct hci_dev *hdev); +void btintel_bootup(struct hci_dev *hdev, const void *ptr, unsigned int len); +void btintel_secure_send_result(struct hci_dev *hdev, + const void *ptr, unsigned int len); #else static inline int btintel_check_bdaddr(struct hci_dev *hdev) @@ -202,44 +231,18 @@ static inline int btintel_set_diag(struct hci_dev *hdev, bool enable) return -EOPNOTSUPP; } -static inline int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable) -{ - return -EOPNOTSUPP; -} - -static inline void btintel_hw_error(struct hci_dev *hdev, u8 code) -{ -} - static inline int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver) { return -EOPNOTSUPP; } -static inline int btintel_version_info_tlv(struct hci_dev *hdev, - struct intel_version_tlv *version) -{ - return -EOPNOTSUPP; -} - -static inline int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, - u32 plen, const void *param) -{ - return -EOPNOTSUPP; -} - static inline int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name) { return -EOPNOTSUPP; } -static inline int btintel_set_event_mask(struct hci_dev *hdev, bool debug) -{ - return -EOPNOTSUPP; -} - static inline int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug) { return -EOPNOTSUPP; @@ -251,12 +254,6 @@ static inline int btintel_read_version(struct hci_dev *hdev, return -EOPNOTSUPP; } -static inline int btintel_read_version_tlv(struct hci_dev *hdev, - struct intel_version_tlv *ver) -{ - return -EOPNOTSUPP; -} - static inline struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read, u16 opcode_write) @@ -283,28 +280,18 @@ static inline int btintel_download_firmware(struct hci_dev *dev, return -EOPNOTSUPP; } -static inline int btintel_download_firmware_newgen(struct hci_dev *hdev, - const struct firmware *fw, - u32 *boot_param, - u8 hw_variant, u8 sbe_type) -{ - return -EOPNOTSUPP; -} - -static inline void btintel_reset_to_bootloader(struct hci_dev *hdev) +static inline int btintel_configure_setup(struct hci_dev *hdev) { + return -ENODEV; } -static inline int btintel_read_debug_features(struct hci_dev *hdev, - struct intel_debug_features *features) +static inline void btintel_bootup(struct hci_dev *hdev, + const void *ptr, unsigned int len) { - return -EOPNOTSUPP; } -static inline int btintel_set_debug_features(struct hci_dev *hdev, - const struct intel_debug_features *features) +static inline void btintel_secure_send_result(struct hci_dev *hdev, + const void *ptr, unsigned int len) { - return -EOPNOTSUPP; } - #endif diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index cddd350beba3..68378b42ea7f 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -1350,6 +1350,7 @@ static void btmrvl_sdio_coredump(struct device *dev) u8 *dbg_ptr, *end_ptr, *fw_dump_data, *fw_dump_ptr; u8 dump_num = 0, idx, i, read_reg, doneflag = 0; u32 memory_size, fw_dump_len = 0; + int size = 0; card = sdio_get_drvdata(func); priv = card->priv; @@ -1478,7 +1479,7 @@ done: if (fw_dump_len == 0) return; - fw_dump_data = vzalloc(fw_dump_len+1); + fw_dump_data = vzalloc(fw_dump_len + 1); if (!fw_dump_data) { BT_ERR("Vzalloc fw_dump_data fail!"); return; @@ -1493,20 +1494,18 @@ done: struct memory_type_mapping *entry = &mem_type_mapping_tbl[idx]; if (entry->mem_ptr) { - strcpy(fw_dump_ptr, "========Start dump "); - fw_dump_ptr += strlen("========Start dump "); - - strcpy(fw_dump_ptr, entry->mem_name); - fw_dump_ptr += strlen(entry->mem_name); - - strcpy(fw_dump_ptr, "========\n"); - fw_dump_ptr += strlen("========\n"); - - memcpy(fw_dump_ptr, entry->mem_ptr, entry->mem_size); - fw_dump_ptr += entry->mem_size; - - strcpy(fw_dump_ptr, "\n========End dump========\n"); - fw_dump_ptr += strlen("\n========End dump========\n"); + size += scnprintf(fw_dump_ptr + size, + fw_dump_len + 1 - size, + "========Start dump %s========\n", + entry->mem_name); + + memcpy(fw_dump_ptr + size, entry->mem_ptr, + entry->mem_size); + size += entry->mem_size; + + size += scnprintf(fw_dump_ptr + size, + fw_dump_len + 1 - size, + "\n========End dump========\n"); vfree(mem_type_mapping_tbl[idx].mem_ptr); mem_type_mapping_tbl[idx].mem_ptr = NULL; diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c index bea1595f6432..8646b6dd11e9 100644 --- a/drivers/bluetooth/btrsi.c +++ b/drivers/bluetooth/btrsi.c @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2017 Redpine Signals Inc. * * Permission to use, copy, modify, and/or distribute this software for any diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index cce0125ec4fd..1f8afa0244d8 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -681,11 +681,15 @@ out_free: } } - /* RTL8822CE supports the Microsoft vendor extension and uses 0xFCF0 - * for VsMsftOpCode. + /* The following chips supports the Microsoft vendor extension, + * therefore set the corresponding VsMsftOpCode. */ - if (lmp_subver == RTL_ROM_LMP_8822B) + switch (lmp_subver) { + case RTL_ROM_LMP_8822B: + case RTL_ROM_LMP_8852A: hci_set_msft_opcode(hdev, 0xFCF0); + break; + } return btrtl_dev; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a9855a2dd561..60d2fce59a71 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -43,12 +43,11 @@ static struct usb_driver btusb_driver; #define BTUSB_BROKEN_ISOC 0x20 #define BTUSB_WRONG_SCO_MTU 0x40 #define BTUSB_ATH3012 0x80 -#define BTUSB_INTEL 0x100 +#define BTUSB_INTEL_COMBINED 0x100 #define BTUSB_INTEL_BOOT 0x200 #define BTUSB_BCM_PATCHRAM 0x400 #define BTUSB_MARVELL 0x800 #define BTUSB_SWAVE 0x1000 -#define BTUSB_INTEL_NEW 0x2000 #define BTUSB_AMP 0x4000 #define BTUSB_QCA_ROME 0x8000 #define BTUSB_BCM_APPLE 0x10000 @@ -60,7 +59,7 @@ static struct usb_driver btusb_driver; #define BTUSB_WIDEBAND_SPEECH 0x400000 #define BTUSB_VALID_LE_STATES 0x800000 #define BTUSB_QCA_WCN6855 0x1000000 -#define BTUSB_INTEL_NEWGEN 0x2000000 +#define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -119,9 +118,6 @@ static const struct usb_device_id btusb_table[] = { /* Canyon CN-BTU1 with HID interfaces */ { USB_DEVICE(0x0c10, 0x0000) }, - /* Broadcom BCM20702A0 */ - { USB_DEVICE(0x413c, 0x8197) }, - /* Broadcom BCM20702B0 (Dynex/Insignia) */ { USB_DEVICE(0x19ff, 0x0239), .driver_info = BTUSB_BCM_PATCHRAM }, @@ -297,7 +293,8 @@ static const struct usb_device_id blacklist_table[] = { /* QCA WCN6855 chipset */ { USB_DEVICE(0x0cf3, 0xe600), .driver_info = BTUSB_QCA_WCN6855 | - BTUSB_WIDEBAND_SPEECH }, + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, /* Broadcom BCM2035 */ { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 }, @@ -361,27 +358,18 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL }, /* Intel Bluetooth devices */ - { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW | - BTUSB_WIDEBAND_SPEECH | - BTUSB_VALID_LE_STATES }, - { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW | - BTUSB_WIDEBAND_SPEECH }, - { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW | - BTUSB_WIDEBAND_SPEECH }, - { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_NEWGEN | - BTUSB_WIDEBAND_SPEECH}, - { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_NEWGEN | - BTUSB_WIDEBAND_SPEECH}, + { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED }, + { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_COMBINED }, + { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_COMBINED }, + { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_COMBINED }, + { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, - { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, - { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, - { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW | - BTUSB_WIDEBAND_SPEECH }, - { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL | - BTUSB_WIDEBAND_SPEECH }, - { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW | - BTUSB_WIDEBAND_SPEECH | - BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED | + BTUSB_INTEL_BROKEN_INITIAL_NCMD }, + { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED }, + { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED }, + { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED }, + { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_COMBINED }, /* Other Intel Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01), @@ -410,10 +398,21 @@ static const struct usb_device_id blacklist_table[] = { /* Additional MediaTek MT7615E Bluetooth devices */ { USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK}, + /* Additional MediaTek MT7668 Bluetooth devices */ + { USB_DEVICE(0x043e, 0x3109), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, + /* Additional MediaTek MT7921 Bluetooth devices */ { USB_DEVICE(0x04ca, 0x3802), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x13d3, 0x3563), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x0489, 0xe0cd), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, /* Additional Realtek 8723AE Bluetooth devices */ { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, @@ -452,6 +451,10 @@ static const struct usb_device_id blacklist_table[] = { /* Additional Realtek 8822CE Bluetooth devices */ { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + /* Bluetooth component of Realtek 8852AE device */ + { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x04c5, 0x161f), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0b05, 0x18ef), .driver_info = BTUSB_REALTEK | @@ -524,7 +527,8 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { #define BTUSB_OOB_WAKE_ENABLED 11 #define BTUSB_HW_RESET_ACTIVE 12 #define BTUSB_TX_WAIT_VND_EVT 13 -#define BTUSB_WAKEUP_DISABLE 14 +#define BTUSB_WAKEUP_AUTOSUSPEND 14 +#define BTUSB_USE_ALT3_FOR_WBS 15 struct btusb_data { struct hci_dev *hdev; @@ -575,6 +579,7 @@ struct btusb_data { int suspend_count; int (*recv_event)(struct hci_dev *hdev, struct sk_buff *skb); + int (*recv_acl)(struct hci_dev *hdev, struct sk_buff *skb); int (*recv_bulk)(struct btusb_data *data, void *buffer, int count); int (*setup_on_usb)(struct hci_dev *hdev); @@ -782,7 +787,7 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count) if (!hci_skb_expect(skb)) { /* Complete frame */ - hci_recv_frame(data->hdev, skb); + data->recv_acl(data->hdev, skb); skb = NULL; } } @@ -1345,13 +1350,6 @@ static int btusb_open(struct hci_dev *hdev) data->intf->needs_remote_wakeup = 1; - /* Disable device remote wakeup when host is suspended - * For Realtek chips, global suspend without - * SET_FEATURE (DEVICE_REMOTE_WAKEUP) can save more power in device. - */ - if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags)) - device_wakeup_disable(&data->udev->dev); - if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags)) goto done; @@ -1418,7 +1416,7 @@ static int btusb_close(struct hci_dev *hdev) data->intf->needs_remote_wakeup = 0; /* Enable remote wake up for auto-suspend */ - if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags)) + if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags)) data->intf->needs_remote_wakeup = 1; usb_autopm_put_interface(data->intf); @@ -1757,16 +1755,20 @@ static void btusb_work(struct work_struct *work) /* Bluetooth USB spec recommends alt 6 (63 bytes), but * many adapters do not support it. Alt 1 appears to * work for all adapters that do not have alt 6, and - * which work with WBS at all. + * which work with WBS at all. Some devices prefer + * alt 3 (HCI payload >= 60 Bytes let air packet + * data satisfy 60 bytes), requiring + * MTU >= 3 (packets) * 25 (size) - 3 (headers) = 72 + * see also Core spec 5, vol 4, B 2.1.1 & Table 2.1. */ - new_alts = btusb_find_altsetting(data, 6) ? 6 : 1; - /* Because mSBC frames do not need to be aligned to the - * SCO packet boundary. If support the Alt 3, use the - * Alt 3 for HCI payload >= 60 Bytes let air packet - * data satisfy 60 bytes. - */ - if (new_alts == 1 && btusb_find_altsetting(data, 3)) + if (btusb_find_altsetting(data, 6)) + new_alts = 6; + else if (btusb_find_altsetting(data, 3) && + hdev->sco_mtu >= 72 && + test_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags)) new_alts = 3; + else + new_alts = 1; } if (btusb_switch_alt_setting(hdev, new_alts) < 0) @@ -1890,7 +1892,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) is_fake = true; if (is_fake) { - bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds..."); + bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds and force-suspending once..."); /* Generally these clones have big discrepancies between * advertised features and what's actually supported. @@ -1907,361 +1909,53 @@ static int btusb_setup_csr(struct hci_dev *hdev) clear_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); /* - * Special workaround for clones with a Barrot 8041a02 chip, - * these clones are really messed-up: - * 1. Their bulk rx endpoint will never report any data unless - * the device was suspended at least once (yes really). + * Special workaround for these BT 4.0 chip clones, and potentially more: + * + * - 0x0134: a Barrot 8041a02 (HCI rev: 0x1012 sub: 0x0810) + * - 0x7558: IC markings FR3191AHAL 749H15143 (HCI rev/sub-version: 0x0709) + * + * These controllers are really messed-up. + * + * 1. Their bulk RX endpoint will never report any data unless + * the device was suspended at least once (yes, really). * 2. They will not wakeup when autosuspended and receiving data - * on their bulk rx endpoint from e.g. a keyboard or mouse + * on their bulk RX endpoint from e.g. a keyboard or mouse * (IOW remote-wakeup support is broken for the bulk endpoint). * * To fix 1. enable runtime-suspend, force-suspend the - * hci and then wake-it up by disabling runtime-suspend. + * HCI and then wake-it up by disabling runtime-suspend. * - * To fix 2. clear the hci's can_wake flag, this way the hci + * To fix 2. clear the HCI's can_wake flag, this way the HCI * will still be autosuspended when it is not open. + * + * -- + * + * Because these are widespread problems we prefer generic solutions; so + * apply this initialization quirk to every controller that gets here, + * it should be harmless. The alternative is to not work at all. */ - if (bcdDevice == 0x8891 && - le16_to_cpu(rp->lmp_subver) == 0x1012 && - le16_to_cpu(rp->hci_rev) == 0x0810 && - le16_to_cpu(rp->hci_ver) == BLUETOOTH_VER_4_0) { - bt_dev_warn(hdev, "CSR: detected a fake CSR dongle using a Barrot 8041a02 chip, this chip is very buggy and may have issues"); - - pm_runtime_allow(&data->udev->dev); - - ret = pm_runtime_suspend(&data->udev->dev); - if (ret >= 0) - msleep(200); - else - bt_dev_err(hdev, "Failed to suspend the device for Barrot 8041a02 receive-issue workaround"); - - pm_runtime_forbid(&data->udev->dev); - - device_set_wakeup_capable(&data->udev->dev, false); - /* Re-enable autosuspend if this was requested */ - if (enable_autosuspend) - usb_enable_autosuspend(data->udev); - } - } - - kfree_skb(skb); - - return 0; -} - -static const struct firmware *btusb_setup_intel_get_fw(struct hci_dev *hdev, - struct intel_version *ver) -{ - const struct firmware *fw; - char fwname[64]; - int ret; - - snprintf(fwname, sizeof(fwname), - "intel/ibt-hw-%x.%x.%x-fw-%x.%x.%x.%x.%x.bseq", - ver->hw_platform, ver->hw_variant, ver->hw_revision, - ver->fw_variant, ver->fw_revision, ver->fw_build_num, - ver->fw_build_ww, ver->fw_build_yy); - - ret = request_firmware(&fw, fwname, &hdev->dev); - if (ret < 0) { - if (ret == -EINVAL) { - bt_dev_err(hdev, "Intel firmware file request failed (%d)", - ret); - return NULL; - } - - bt_dev_err(hdev, "failed to open Intel firmware file: %s (%d)", - fwname, ret); - - /* If the correct firmware patch file is not found, use the - * default firmware patch file instead - */ - snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.bseq", - ver->hw_platform, ver->hw_variant); - if (request_firmware(&fw, fwname, &hdev->dev) < 0) { - bt_dev_err(hdev, "failed to open default fw file: %s", - fwname); - return NULL; - } - } + pm_runtime_allow(&data->udev->dev); - bt_dev_info(hdev, "Intel Bluetooth firmware file: %s", fwname); - - return fw; -} - -static int btusb_setup_intel_patching(struct hci_dev *hdev, - const struct firmware *fw, - const u8 **fw_ptr, int *disable_patch) -{ - struct sk_buff *skb; - struct hci_command_hdr *cmd; - const u8 *cmd_param; - struct hci_event_hdr *evt = NULL; - const u8 *evt_param = NULL; - int remain = fw->size - (*fw_ptr - fw->data); - - /* The first byte indicates the types of the patch command or event. - * 0x01 means HCI command and 0x02 is HCI event. If the first bytes - * in the current firmware buffer doesn't start with 0x01 or - * the size of remain buffer is smaller than HCI command header, - * the firmware file is corrupted and it should stop the patching - * process. - */ - if (remain > HCI_COMMAND_HDR_SIZE && *fw_ptr[0] != 0x01) { - bt_dev_err(hdev, "Intel fw corrupted: invalid cmd read"); - return -EINVAL; - } - (*fw_ptr)++; - remain--; - - cmd = (struct hci_command_hdr *)(*fw_ptr); - *fw_ptr += sizeof(*cmd); - remain -= sizeof(*cmd); - - /* Ensure that the remain firmware data is long enough than the length - * of command parameter. If not, the firmware file is corrupted. - */ - if (remain < cmd->plen) { - bt_dev_err(hdev, "Intel fw corrupted: invalid cmd len"); - return -EFAULT; - } - - /* If there is a command that loads a patch in the firmware - * file, then enable the patch upon success, otherwise just - * disable the manufacturer mode, for example patch activation - * is not required when the default firmware patch file is used - * because there are no patch data to load. - */ - if (*disable_patch && le16_to_cpu(cmd->opcode) == 0xfc8e) - *disable_patch = 0; - - cmd_param = *fw_ptr; - *fw_ptr += cmd->plen; - remain -= cmd->plen; - - /* This reads the expected events when the above command is sent to the - * device. Some vendor commands expects more than one events, for - * example command status event followed by vendor specific event. - * For this case, it only keeps the last expected event. so the command - * can be sent with __hci_cmd_sync_ev() which returns the sk_buff of - * last expected event. - */ - while (remain > HCI_EVENT_HDR_SIZE && *fw_ptr[0] == 0x02) { - (*fw_ptr)++; - remain--; - - evt = (struct hci_event_hdr *)(*fw_ptr); - *fw_ptr += sizeof(*evt); - remain -= sizeof(*evt); - - if (remain < evt->plen) { - bt_dev_err(hdev, "Intel fw corrupted: invalid evt len"); - return -EFAULT; - } - - evt_param = *fw_ptr; - *fw_ptr += evt->plen; - remain -= evt->plen; - } + ret = pm_runtime_suspend(&data->udev->dev); + if (ret >= 0) + msleep(200); + else + bt_dev_err(hdev, "CSR: Failed to suspend the device for our Barrot 8041a02 receive-issue workaround"); - /* Every HCI commands in the firmware file has its correspond event. - * If event is not found or remain is smaller than zero, the firmware - * file is corrupted. - */ - if (!evt || !evt_param || remain < 0) { - bt_dev_err(hdev, "Intel fw corrupted: invalid evt read"); - return -EFAULT; - } + pm_runtime_forbid(&data->udev->dev); - skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cmd->opcode), cmd->plen, - cmd_param, evt->evt, HCI_INIT_TIMEOUT); - if (IS_ERR(skb)) { - bt_dev_err(hdev, "sending Intel patch command (0x%4.4x) failed (%ld)", - cmd->opcode, PTR_ERR(skb)); - return PTR_ERR(skb); - } + device_set_wakeup_capable(&data->udev->dev, false); - /* It ensures that the returned event matches the event data read from - * the firmware file. At fist, it checks the length and then - * the contents of the event. - */ - if (skb->len != evt->plen) { - bt_dev_err(hdev, "mismatch event length (opcode 0x%4.4x)", - le16_to_cpu(cmd->opcode)); - kfree_skb(skb); - return -EFAULT; + /* Re-enable autosuspend if this was requested */ + if (enable_autosuspend) + usb_enable_autosuspend(data->udev); } - if (memcmp(skb->data, evt_param, evt->plen)) { - bt_dev_err(hdev, "mismatch event parameter (opcode 0x%4.4x)", - le16_to_cpu(cmd->opcode)); - kfree_skb(skb); - return -EFAULT; - } kfree_skb(skb); return 0; } -static int btusb_setup_intel(struct hci_dev *hdev) -{ - struct sk_buff *skb; - const struct firmware *fw; - const u8 *fw_ptr; - int disable_patch, err; - struct intel_version ver; - - BT_DBG("%s", hdev->name); - - /* The controller has a bug with the first HCI command sent to it - * returning number of completed commands as zero. This would stall the - * command processing in the Bluetooth core. - * - * As a workaround, send HCI Reset command first which will reset the - * number of completed commands and allow normal command processing - * from now on. - */ - skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); - if (IS_ERR(skb)) { - bt_dev_err(hdev, "sending initial HCI reset command failed (%ld)", - PTR_ERR(skb)); - return PTR_ERR(skb); - } - kfree_skb(skb); - - /* Read Intel specific controller version first to allow selection of - * which firmware file to load. - * - * The returned information are hardware variant and revision plus - * firmware variant, revision and build number. - */ - err = btintel_read_version(hdev, &ver); - if (err) - return err; - - bt_dev_info(hdev, "read Intel version: %02x%02x%02x%02x%02x%02x%02x%02x%02x", - ver.hw_platform, ver.hw_variant, ver.hw_revision, - ver.fw_variant, ver.fw_revision, ver.fw_build_num, - ver.fw_build_ww, ver.fw_build_yy, ver.fw_patch_num); - - /* fw_patch_num indicates the version of patch the device currently - * have. If there is no patch data in the device, it is always 0x00. - * So, if it is other than 0x00, no need to patch the device again. - */ - if (ver.fw_patch_num) { - bt_dev_info(hdev, "Intel device is already patched. " - "patch num: %02x", ver.fw_patch_num); - goto complete; - } - - /* Opens the firmware patch file based on the firmware version read - * from the controller. If it fails to open the matching firmware - * patch file, it tries to open the default firmware patch file. - * If no patch file is found, allow the device to operate without - * a patch. - */ - fw = btusb_setup_intel_get_fw(hdev, &ver); - if (!fw) - goto complete; - fw_ptr = fw->data; - - /* Enable the manufacturer mode of the controller. - * Only while this mode is enabled, the driver can download the - * firmware patch data and configuration parameters. - */ - err = btintel_enter_mfg(hdev); - if (err) { - release_firmware(fw); - return err; - } - - disable_patch = 1; - - /* The firmware data file consists of list of Intel specific HCI - * commands and its expected events. The first byte indicates the - * type of the message, either HCI command or HCI event. - * - * It reads the command and its expected event from the firmware file, - * and send to the controller. Once __hci_cmd_sync_ev() returns, - * the returned event is compared with the event read from the firmware - * file and it will continue until all the messages are downloaded to - * the controller. - * - * Once the firmware patching is completed successfully, - * the manufacturer mode is disabled with reset and activating the - * downloaded patch. - * - * If the firmware patching fails, the manufacturer mode is - * disabled with reset and deactivating the patch. - * - * If the default patch file is used, no reset is done when disabling - * the manufacturer. - */ - while (fw->size > fw_ptr - fw->data) { - int ret; - - ret = btusb_setup_intel_patching(hdev, fw, &fw_ptr, - &disable_patch); - if (ret < 0) - goto exit_mfg_deactivate; - } - - release_firmware(fw); - - if (disable_patch) - goto exit_mfg_disable; - - /* Patching completed successfully and disable the manufacturer mode - * with reset and activate the downloaded firmware patches. - */ - err = btintel_exit_mfg(hdev, true, true); - if (err) - return err; - - /* Need build number for downloaded fw patches in - * every power-on boot - */ - err = btintel_read_version(hdev, &ver); - if (err) - return err; - bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated", - ver.fw_patch_num); - - goto complete; - -exit_mfg_disable: - /* Disable the manufacturer mode without reset */ - err = btintel_exit_mfg(hdev, false, false); - if (err) - return err; - - bt_dev_info(hdev, "Intel firmware patch completed"); - - goto complete; - -exit_mfg_deactivate: - release_firmware(fw); - - /* Patching failed. Disable the manufacturer mode with reset and - * deactivate the downloaded firmware patches. - */ - err = btintel_exit_mfg(hdev, true, false); - if (err) - return err; - - bt_dev_info(hdev, "Intel firmware patch completed and deactivated"); - -complete: - /* Set the event mask for Intel specific vendor events. This enables - * a few extra events that are useful during general operation. - */ - btintel_set_event_mask_mfg(hdev, false); - - btintel_check_bdaddr(hdev); - return 0; -} - static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode) { struct sk_buff *skb; @@ -2290,49 +1984,21 @@ static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode) static int btusb_recv_bulk_intel(struct btusb_data *data, void *buffer, int count) { + struct hci_dev *hdev = data->hdev; + /* When the device is in bootloader mode, then it can send * events via the bulk endpoint. These events are treated the * same way as the ones received from the interrupt endpoint. */ - if (test_bit(BTUSB_BOOTLOADER, &data->flags)) + if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) return btusb_recv_intr(data, buffer, count); return btusb_recv_bulk(data, buffer, count); } -static void btusb_intel_bootup(struct btusb_data *data, const void *ptr, - unsigned int len) -{ - const struct intel_bootup *evt = ptr; - - if (len != sizeof(*evt)) - return; - - if (test_and_clear_bit(BTUSB_BOOTING, &data->flags)) - wake_up_bit(&data->flags, BTUSB_BOOTING); -} - -static void btusb_intel_secure_send_result(struct btusb_data *data, - const void *ptr, unsigned int len) -{ - const struct intel_secure_send_result *evt = ptr; - - if (len != sizeof(*evt)) - return; - - if (evt->result) - set_bit(BTUSB_FIRMWARE_FAILED, &data->flags); - - if (test_and_clear_bit(BTUSB_DOWNLOADING, &data->flags) && - test_bit(BTUSB_FIRMWARE_LOADED, &data->flags)) - wake_up_bit(&data->flags, BTUSB_DOWNLOADING); -} - static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb) { - struct btusb_data *data = hci_get_drvdata(hdev); - - if (test_bit(BTUSB_BOOTLOADER, &data->flags)) { + if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) { struct hci_event_hdr *hdr = (void *)skb->data; if (skb->len > HCI_EVENT_HDR_SIZE && hdr->evt == 0xff && @@ -2346,7 +2012,7 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb) * the device sends a vendor specific event * indicating that the bootup completed. */ - btusb_intel_bootup(data, ptr, len); + btintel_bootup(hdev, ptr, len); break; case 0x06: /* When the firmware loading completes the @@ -2354,7 +2020,7 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb) * indicating the result of the firmware * loading. */ - btusb_intel_secure_send_result(data, ptr, len); + btintel_secure_send_result(hdev, ptr, len); break; } } @@ -2365,14 +2031,13 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb) static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb) { - struct btusb_data *data = hci_get_drvdata(hdev); struct urb *urb; BT_DBG("%s", hdev->name); switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: - if (test_bit(BTUSB_BOOTLOADER, &data->flags)) { + if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) { struct hci_command_hdr *cmd = (void *)skb->data; __u16 opcode = le16_to_cpu(cmd->opcode); @@ -2424,663 +2089,17 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb) return -EILSEQ; } -static int btusb_setup_intel_new_get_fw_name(struct intel_version *ver, - struct intel_boot_params *params, - char *fw_name, size_t len, - const char *suffix) -{ - switch (ver->hw_variant) { - case 0x0b: /* SfP */ - case 0x0c: /* WsP */ - snprintf(fw_name, len, "intel/ibt-%u-%u.%s", - le16_to_cpu(ver->hw_variant), - le16_to_cpu(params->dev_revid), - suffix); - break; - case 0x11: /* JfP */ - case 0x12: /* ThP */ - case 0x13: /* HrP */ - case 0x14: /* CcP */ - snprintf(fw_name, len, "intel/ibt-%u-%u-%u.%s", - le16_to_cpu(ver->hw_variant), - le16_to_cpu(ver->hw_revision), - le16_to_cpu(ver->fw_revision), - suffix); - break; - default: - return -EINVAL; - } - - return 0; -} - -static void btusb_setup_intel_newgen_get_fw_name(const struct intel_version_tlv *ver_tlv, - char *fw_name, size_t len, - const char *suffix) -{ - /* The firmware file name for new generation controllers will be - * ibt-<cnvi_top type+cnvi_top step>-<cnvr_top type+cnvr_top step> - */ - snprintf(fw_name, len, "intel/ibt-%04x-%04x.%s", - INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver_tlv->cnvi_top), - INTEL_CNVX_TOP_STEP(ver_tlv->cnvi_top)), - INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver_tlv->cnvr_top), - INTEL_CNVX_TOP_STEP(ver_tlv->cnvr_top)), - suffix); -} - -static int btusb_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec) -{ - struct btusb_data *data = hci_get_drvdata(hdev); - ktime_t delta, rettime; - unsigned long long duration; - int err; - - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - - bt_dev_info(hdev, "Waiting for firmware download to complete"); - - err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, - TASK_INTERRUPTIBLE, - msecs_to_jiffies(msec)); - if (err == -EINTR) { - bt_dev_err(hdev, "Firmware loading interrupted"); - return err; - } - - if (err) { - bt_dev_err(hdev, "Firmware loading timeout"); - return -ETIMEDOUT; - } - - if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) { - bt_dev_err(hdev, "Firmware loading failed"); - return -ENOEXEC; - } - - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - duration = (unsigned long long)ktime_to_ns(delta) >> 10; - - bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration); - - return 0; -} - -static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev, - struct intel_version_tlv *ver, - u32 *boot_param) -{ - const struct firmware *fw; - char fwname[64]; - int err; - struct btusb_data *data = hci_get_drvdata(hdev); - ktime_t calltime; - - if (!ver || !boot_param) - return -EINVAL; - - /* The firmware variant determines if the device is in bootloader - * mode or is running operational firmware. The value 0x03 identifies - * the bootloader and the value 0x23 identifies the operational - * firmware. - * - * When the operational firmware is already present, then only - * the check for valid Bluetooth device address is needed. This - * determines if the device will be added as configured or - * unconfigured controller. - * - * It is not possible to use the Secure Boot Parameters in this - * case since that command is only available in bootloader mode. - */ - if (ver->img_type == 0x03) { - clear_bit(BTUSB_BOOTLOADER, &data->flags); - btintel_check_bdaddr(hdev); - } - - /* If the OTP has no valid Bluetooth device address, then there will - * also be no valid address for the operational firmware. - */ - if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) { - bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); - } - - btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi"); - err = firmware_request_nowarn(&fw, fwname, &hdev->dev); - if (err < 0) { - if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) { - /* Firmware has already been loaded */ - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - return 0; - } - - bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", - fwname, err); - - return err; - } - - bt_dev_info(hdev, "Found device firmware: %s", fwname); - - if (fw->size < 644) { - bt_dev_err(hdev, "Invalid size of firmware file (%zu)", - fw->size); - err = -EBADF; - goto done; - } - - calltime = ktime_get(); - - set_bit(BTUSB_DOWNLOADING, &data->flags); - - /* Start firmware downloading and get boot parameter */ - err = btintel_download_firmware_newgen(hdev, ver, fw, boot_param, - INTEL_HW_VARIANT(ver->cnvi_bt), - ver->sbe_type); - if (err < 0) { - if (err == -EALREADY) { - /* Firmware has already been loaded */ - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - err = 0; - goto done; - } - - /* When FW download fails, send Intel Reset to retry - * FW download. - */ - btintel_reset_to_bootloader(hdev); - goto done; - } - - /* Before switching the device into operational mode and with that - * booting the loaded firmware, wait for the bootloader notification - * that all fragments have been successfully received. - * - * When the event processing receives the notification, then the - * BTUSB_DOWNLOADING flag will be cleared. - * - * The firmware loading should not take longer than 5 seconds - * and thus just timeout if that happens and fail the setup - * of this device. - */ - err = btusb_download_wait(hdev, calltime, 5000); - if (err == -ETIMEDOUT) - btintel_reset_to_bootloader(hdev); - -done: - release_firmware(fw); - return err; -} - -static int btusb_intel_download_firmware(struct hci_dev *hdev, - struct intel_version *ver, - struct intel_boot_params *params, - u32 *boot_param) -{ - const struct firmware *fw; - char fwname[64]; - int err; - struct btusb_data *data = hci_get_drvdata(hdev); - ktime_t calltime; - - if (!ver || !params) - return -EINVAL; - - /* The firmware variant determines if the device is in bootloader - * mode or is running operational firmware. The value 0x06 identifies - * the bootloader and the value 0x23 identifies the operational - * firmware. - * - * When the operational firmware is already present, then only - * the check for valid Bluetooth device address is needed. This - * determines if the device will be added as configured or - * unconfigured controller. - * - * It is not possible to use the Secure Boot Parameters in this - * case since that command is only available in bootloader mode. - */ - if (ver->fw_variant == 0x23) { - clear_bit(BTUSB_BOOTLOADER, &data->flags); - btintel_check_bdaddr(hdev); - - /* SfP and WsP don't seem to update the firmware version on file - * so version checking is currently possible. - */ - switch (ver->hw_variant) { - case 0x0b: /* SfP */ - case 0x0c: /* WsP */ - return 0; - } - - /* Proceed to download to check if the version matches */ - goto download; - } - - /* Read the secure boot parameters to identify the operating - * details of the bootloader. - */ - err = btintel_read_boot_params(hdev, params); - if (err) - return err; - - /* It is required that every single firmware fragment is acknowledged - * with a command complete event. If the boot parameters indicate - * that this bootloader does not send them, then abort the setup. - */ - if (params->limited_cce != 0x00) { - bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)", - params->limited_cce); - return -EINVAL; - } - - /* If the OTP has no valid Bluetooth device address, then there will - * also be no valid address for the operational firmware. - */ - if (!bacmp(¶ms->otp_bdaddr, BDADDR_ANY)) { - bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); - } - -download: - /* With this Intel bootloader only the hardware variant and device - * revision information are used to select the right firmware for SfP - * and WsP. - * - * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi. - * - * Currently the supported hardware variants are: - * 11 (0x0b) for iBT3.0 (LnP/SfP) - * 12 (0x0c) for iBT3.5 (WsP) - * - * For ThP/JfP and for future SKU's, the FW name varies based on HW - * variant, HW revision and FW revision, as these are dependent on CNVi - * and RF Combination. - * - * 17 (0x11) for iBT3.5 (JfP) - * 18 (0x12) for iBT3.5 (ThP) - * - * The firmware file name for these will be - * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi. - * - */ - err = btusb_setup_intel_new_get_fw_name(ver, params, fwname, - sizeof(fwname), "sfi"); - if (err < 0) { - if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) { - /* Firmware has already been loaded */ - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - return 0; - } - - bt_dev_err(hdev, "Unsupported Intel firmware naming"); - return -EINVAL; - } - - err = firmware_request_nowarn(&fw, fwname, &hdev->dev); - if (err < 0) { - if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) { - /* Firmware has already been loaded */ - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - return 0; - } - - bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", - fwname, err); - return err; - } - - bt_dev_info(hdev, "Found device firmware: %s", fwname); - - if (fw->size < 644) { - bt_dev_err(hdev, "Invalid size of firmware file (%zu)", - fw->size); - err = -EBADF; - goto done; - } - - calltime = ktime_get(); - - set_bit(BTUSB_DOWNLOADING, &data->flags); - - /* Start firmware downloading and get boot parameter */ - err = btintel_download_firmware(hdev, ver, fw, boot_param); - if (err < 0) { - if (err == -EALREADY) { - /* Firmware has already been loaded */ - set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); - err = 0; - goto done; - } - - /* When FW download fails, send Intel Reset to retry - * FW download. - */ - btintel_reset_to_bootloader(hdev); - goto done; - } - - /* Before switching the device into operational mode and with that - * booting the loaded firmware, wait for the bootloader notification - * that all fragments have been successfully received. - * - * When the event processing receives the notification, then the - * BTUSB_DOWNLOADING flag will be cleared. - * - * The firmware loading should not take longer than 5 seconds - * and thus just timeout if that happens and fail the setup - * of this device. - */ - err = btusb_download_wait(hdev, calltime, 5000); - if (err == -ETIMEDOUT) - btintel_reset_to_bootloader(hdev); - -done: - release_firmware(fw); - return err; -} - -static int btusb_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec) -{ - struct btusb_data *data = hci_get_drvdata(hdev); - ktime_t delta, rettime; - unsigned long long duration; - int err; - - bt_dev_info(hdev, "Waiting for device to boot"); - - err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING, - TASK_INTERRUPTIBLE, - msecs_to_jiffies(msec)); - if (err == -EINTR) { - bt_dev_err(hdev, "Device boot interrupted"); - return -EINTR; - } - - if (err) { - bt_dev_err(hdev, "Device boot timeout"); - return -ETIMEDOUT; - } - - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - duration = (unsigned long long) ktime_to_ns(delta) >> 10; - - bt_dev_info(hdev, "Device booted in %llu usecs", duration); - - return 0; -} - -static int btusb_intel_boot(struct hci_dev *hdev, u32 boot_addr) -{ - struct btusb_data *data = hci_get_drvdata(hdev); - ktime_t calltime; - int err; - - calltime = ktime_get(); - - set_bit(BTUSB_BOOTING, &data->flags); - - err = btintel_send_intel_reset(hdev, boot_addr); - if (err) { - bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err); - btintel_reset_to_bootloader(hdev); - return err; - } - - /* The bootloader will not indicate when the device is ready. This - * is done by the operational firmware sending bootup notification. - * - * Booting into operational firmware should not take longer than - * 1 second. However if that happens, then just fail the setup - * since something went wrong. - */ - err = btusb_boot_wait(hdev, calltime, 1000); - if (err == -ETIMEDOUT) - btintel_reset_to_bootloader(hdev); - - return err; -} - -static int btusb_setup_intel_new(struct hci_dev *hdev) -{ - struct btusb_data *data = hci_get_drvdata(hdev); - struct intel_version ver; - struct intel_boot_params params; - u32 boot_param; - char ddcname[64]; - int err; - struct intel_debug_features features; - - BT_DBG("%s", hdev->name); - - /* Set the default boot parameter to 0x0 and it is updated to - * SKU specific boot parameter after reading Intel_Write_Boot_Params - * command while downloading the firmware. - */ - boot_param = 0x00000000; - - /* Read the Intel version information to determine if the device - * is in bootloader mode or if it already has operational firmware - * loaded. - */ - err = btintel_read_version(hdev, &ver); - if (err) { - bt_dev_err(hdev, "Intel Read version failed (%d)", err); - btintel_reset_to_bootloader(hdev); - return err; - } - - err = btintel_version_info(hdev, &ver); - if (err) - return err; - - err = btusb_intel_download_firmware(hdev, &ver, ¶ms, &boot_param); - if (err) - return err; - - /* controller is already having an operational firmware */ - if (ver.fw_variant == 0x23) - goto finish; - - err = btusb_intel_boot(hdev, boot_param); - if (err) - return err; - - clear_bit(BTUSB_BOOTLOADER, &data->flags); - - err = btusb_setup_intel_new_get_fw_name(&ver, ¶ms, ddcname, - sizeof(ddcname), "ddc"); - - if (err < 0) { - bt_dev_err(hdev, "Unsupported Intel firmware naming"); - } else { - /* Once the device is running in operational mode, it needs to - * apply the device configuration (DDC) parameters. - * - * The device can work without DDC parameters, so even if it - * fails to load the file, no need to fail the setup. - */ - btintel_load_ddc_config(hdev, ddcname); - } - - /* Read the Intel supported features and if new exception formats - * supported, need to load the additional DDC config to enable. - */ - btintel_read_debug_features(hdev, &features); - - /* Set DDC mask for available debug features */ - btintel_set_debug_features(hdev, &features); - - /* Read the Intel version information after loading the FW */ - err = btintel_read_version(hdev, &ver); - if (err) - return err; - - btintel_version_info(hdev, &ver); - -finish: - /* All Intel controllers that support the Microsoft vendor - * extension are using 0xFC1E for VsMsftOpCode. - */ - switch (ver.hw_variant) { - case 0x11: /* JfP */ - case 0x12: /* ThP */ - case 0x13: /* HrP */ - case 0x14: /* CcP */ - hci_set_msft_opcode(hdev, 0xFC1E); - break; - } - - /* Set the event mask for Intel specific vendor events. This enables - * a few extra events that are useful during general operation. It - * does not enable any debugging related events. - * - * The device will function correctly without these events enabled - * and thus no need to fail the setup. - */ - btintel_set_event_mask(hdev, false); - - return 0; -} - -static int btusb_setup_intel_newgen(struct hci_dev *hdev) -{ - struct btusb_data *data = hci_get_drvdata(hdev); - u32 boot_param; - char ddcname[64]; - int err; - struct intel_debug_features features; - struct intel_version_tlv version; - - bt_dev_dbg(hdev, ""); - - /* Set the default boot parameter to 0x0 and it is updated to - * SKU specific boot parameter after reading Intel_Write_Boot_Params - * command while downloading the firmware. - */ - boot_param = 0x00000000; - - /* Read the Intel version information to determine if the device - * is in bootloader mode or if it already has operational firmware - * loaded. - */ - err = btintel_read_version_tlv(hdev, &version); - if (err) { - bt_dev_err(hdev, "Intel Read version failed (%d)", err); - btintel_reset_to_bootloader(hdev); - return err; - } - - err = btintel_version_info_tlv(hdev, &version); - if (err) - return err; - - err = btusb_intel_download_firmware_newgen(hdev, &version, &boot_param); - if (err) - return err; - - /* check if controller is already having an operational firmware */ - if (version.img_type == 0x03) - goto finish; - - err = btusb_intel_boot(hdev, boot_param); - if (err) - return err; - - clear_bit(BTUSB_BOOTLOADER, &data->flags); - - btusb_setup_intel_newgen_get_fw_name(&version, ddcname, sizeof(ddcname), - "ddc"); - /* Once the device is running in operational mode, it needs to - * apply the device configuration (DDC) parameters. - * - * The device can work without DDC parameters, so even if it - * fails to load the file, no need to fail the setup. - */ - btintel_load_ddc_config(hdev, ddcname); - - /* Read the Intel supported features and if new exception formats - * supported, need to load the additional DDC config to enable. - */ - btintel_read_debug_features(hdev, &features); - - /* Set DDC mask for available debug features */ - btintel_set_debug_features(hdev, &features); - - /* Read the Intel version information after loading the FW */ - err = btintel_read_version_tlv(hdev, &version); - if (err) - return err; - - btintel_version_info_tlv(hdev, &version); - -finish: - /* Set the event mask for Intel specific vendor events. This enables - * a few extra events that are useful during general operation. It - * does not enable any debugging related events. - * - * The device will function correctly without these events enabled - * and thus no need to fail the setup. - */ - btintel_set_event_mask(hdev, false); - - return 0; -} -static int btusb_shutdown_intel(struct hci_dev *hdev) -{ - struct sk_buff *skb; - long ret; - - /* In the shutdown sequence where Bluetooth is turned off followed - * by WiFi being turned off, turning WiFi back on causes issue with - * the RF calibration. - * - * To ensure that any RF activity has been stopped, issue HCI Reset - * command to clear all ongoing activity including advertising, - * scanning etc. - */ - skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); - if (IS_ERR(skb)) { - ret = PTR_ERR(skb); - bt_dev_err(hdev, "HCI reset during shutdown failed"); - return ret; - } - kfree_skb(skb); - - /* Some platforms have an issue with BT LED when the interface is - * down or BT radio is turned off, which takes 5 seconds to BT LED - * goes off. This command turns off the BT LED immediately. - */ - skb = __hci_cmd_sync(hdev, 0xfc3f, 0, NULL, HCI_INIT_TIMEOUT); - if (IS_ERR(skb)) { - ret = PTR_ERR(skb); - bt_dev_err(hdev, "turning off Intel device LED failed"); - return ret; - } - kfree_skb(skb); - - return 0; -} - -static int btusb_shutdown_intel_new(struct hci_dev *hdev) -{ - struct sk_buff *skb; - - /* Send HCI Reset to the controller to stop any BT activity which - * were triggered. This will help to save power and maintain the - * sync b/w Host and controller - */ - skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); - if (IS_ERR(skb)) { - bt_dev_err(hdev, "HCI reset during shutdown failed"); - return PTR_ERR(skb); - } - kfree_skb(skb); - - return 0; -} - +/* UHW CR mapping */ +#define MTK_BT_MISC 0x70002510 +#define MTK_BT_SUBSYS_RST 0x70002610 +#define MTK_UDMA_INT_STA_BT 0x74000024 +#define MTK_UDMA_INT_STA_BT1 0x74000308 +#define MTK_BT_WDT_STATUS 0x740003A0 +#define MTK_EP_RST_OPT 0x74011890 +#define MTK_EP_RST_IN_OUT_OPT 0x00010001 +#define MTK_BT_RST_DONE 0x00000100 +#define MTK_BT_RESET_WAIT_MS 100 +#define MTK_BT_RESET_NUM_TRIES 10 #define FIRMWARE_MT7663 "mediatek/mt7663pr2h.bin" #define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin" @@ -3655,6 +2674,63 @@ static int btusb_mtk_func_query(struct hci_dev *hdev) return status; } +static int btusb_mtk_uhw_reg_write(struct btusb_data *data, u32 reg, u32 val) +{ + struct hci_dev *hdev = data->hdev; + int pipe, err; + void *buf; + + buf = kzalloc(4, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + put_unaligned_le32(val, buf); + + pipe = usb_sndctrlpipe(data->udev, 0); + err = usb_control_msg(data->udev, pipe, 0x02, + 0x5E, + reg >> 16, reg & 0xffff, + buf, 4, USB_CTRL_SET_TIMEOUT); + if (err < 0) { + bt_dev_err(hdev, "Failed to write uhw reg(%d)", err); + goto err_free_buf; + } + +err_free_buf: + kfree(buf); + + return err; +} + +static int btusb_mtk_uhw_reg_read(struct btusb_data *data, u32 reg, u32 *val) +{ + struct hci_dev *hdev = data->hdev; + int pipe, err; + void *buf; + + buf = kzalloc(4, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pipe = usb_rcvctrlpipe(data->udev, 0); + err = usb_control_msg(data->udev, pipe, 0x01, + 0xDE, + reg >> 16, reg & 0xffff, + buf, 4, USB_CTRL_SET_TIMEOUT); + if (err < 0) { + bt_dev_err(hdev, "Failed to read uhw reg(%d)", err); + goto err_free_buf; + } + + *val = get_unaligned_le32(buf); + bt_dev_dbg(hdev, "reg=%x, value=0x%08x", reg, *val); + +err_free_buf: + kfree(buf); + + return err; +} + static int btusb_mtk_reg_read(struct btusb_data *data, u32 reg, u32 *val) { int pipe, err, size = sizeof(u32); @@ -3734,6 +2810,9 @@ static int btusb_mtk_setup(struct hci_dev *hdev) dev_id & 0xffff, (fw_version & 0xff) + 1); err = btusb_mtk_setup_firmware_79xx(hdev, fw_bin_name); + /* It's Device EndPoint Reset Option Register */ + btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT); + /* Enable Bluetooth protocol */ param = 1; wmt_params.op = BTMTK_WMT_FUNC_CTRL; @@ -3747,6 +2826,8 @@ static int btusb_mtk_setup(struct hci_dev *hdev) bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err); return err; } + + hci_set_msft_opcode(hdev, 0xFD30); goto done; default: bt_dev_err(hdev, "Unsupported hardware variant (%08x)", @@ -3857,6 +2938,83 @@ static int btusb_mtk_shutdown(struct hci_dev *hdev) return 0; } +static void btusb_mtk_cmd_timeout(struct hci_dev *hdev) +{ + struct btusb_data *data = hci_get_drvdata(hdev); + u32 val; + int err, retry = 0; + + /* It's MediaTek specific bluetooth reset mechanism via USB */ + if (test_and_set_bit(BTUSB_HW_RESET_ACTIVE, &data->flags)) { + bt_dev_err(hdev, "last reset failed? Not resetting again"); + return; + } + + err = usb_autopm_get_interface(data->intf); + if (err < 0) + return; + + btusb_stop_traffic(data); + usb_kill_anchored_urbs(&data->tx_anchor); + + /* It's Device EndPoint Reset Option Register */ + bt_dev_dbg(hdev, "Initiating reset mechanism via uhw"); + btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT); + btusb_mtk_uhw_reg_read(data, MTK_BT_WDT_STATUS, &val); + + /* Reset the bluetooth chip via USB interface. */ + btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 1); + btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT, 0x000000FF); + btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT, &val); + btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT1, 0x000000FF); + btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT1, &val); + /* MT7921 need to delay 20ms between toggle reset bit */ + msleep(20); + btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 0); + btusb_mtk_uhw_reg_read(data, MTK_BT_SUBSYS_RST, &val); + + /* Poll the register until reset is completed */ + do { + btusb_mtk_uhw_reg_read(data, MTK_BT_MISC, &val); + if (val & MTK_BT_RST_DONE) { + bt_dev_dbg(hdev, "Bluetooth Reset Successfully"); + break; + } + + bt_dev_dbg(hdev, "Polling Bluetooth Reset CR"); + retry++; + msleep(MTK_BT_RESET_WAIT_MS); + } while (retry < MTK_BT_RESET_NUM_TRIES); + + btusb_mtk_id_get(data, 0x70010200, &val); + if (!val) + bt_dev_err(hdev, "Can't get device id, subsys reset fail."); + + usb_queue_reset_device(data->intf); + + clear_bit(BTUSB_HW_RESET_ACTIVE, &data->flags); +} + +static int btusb_recv_acl_mtk(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct btusb_data *data = hci_get_drvdata(hdev); + u16 handle = le16_to_cpu(hci_acl_hdr(skb)->handle); + + switch (handle) { + case 0xfc6f: /* Firmware dump from device */ + /* When the firmware hangs, the device can no longer + * suspend and thus disable auto-suspend. + */ + usb_disable_autosuspend(data->udev); + fallthrough; + case 0x05ff: /* Firmware debug logging 1 */ + case 0x05fe: /* Firmware debug logging 2 */ + return hci_recv_diag(hdev, skb); + } + + return hci_recv_frame(hdev, skb); +} + MODULE_FIRMWARE(FIRMWARE_MT7663); MODULE_FIRMWARE(FIRMWARE_MT7668); @@ -4437,9 +3595,6 @@ static bool btusb_prevent_wake(struct hci_dev *hdev) { struct btusb_data *data = hci_get_drvdata(hdev); - if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags)) - return true; - return !device_may_wakeup(&data->udev->dev); } @@ -4465,7 +3620,7 @@ static int btusb_probe(struct usb_interface *intf, struct btusb_data *data; struct hci_dev *hdev; unsigned ifnum_base; - int i, err; + int i, err, priv_size; BT_DBG("intf %p id %p", intf, id); @@ -4551,16 +3706,23 @@ static int btusb_probe(struct usb_interface *intf, init_usb_anchor(&data->ctrl_anchor); spin_lock_init(&data->rxlock); - if (id->driver_info & BTUSB_INTEL_NEW) { + priv_size = 0; + + data->recv_event = hci_recv_frame; + data->recv_bulk = btusb_recv_bulk; + + if (id->driver_info & BTUSB_INTEL_COMBINED) { + /* Allocate extra space for Intel device */ + priv_size += sizeof(struct btintel_data); + + /* Override the rx handlers */ data->recv_event = btusb_recv_event_intel; data->recv_bulk = btusb_recv_bulk_intel; - set_bit(BTUSB_BOOTLOADER, &data->flags); - } else { - data->recv_event = hci_recv_frame; - data->recv_bulk = btusb_recv_bulk; } - hdev = hci_alloc_dev(); + data->recv_acl = hci_recv_frame; + + hdev = hci_alloc_dev_priv(priv_size); if (!hdev) return -ENOMEM; @@ -4634,48 +3796,18 @@ static int btusb_probe(struct usb_interface *intf, data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2); } - if (id->driver_info & BTUSB_INTEL) { - hdev->manufacturer = 2; - hdev->setup = btusb_setup_intel; - hdev->shutdown = btusb_shutdown_intel; - hdev->set_diag = btintel_set_diag_mfg; - hdev->set_bdaddr = btintel_set_bdaddr; - hdev->cmd_timeout = btusb_intel_cmd_timeout; - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); - } - - if (id->driver_info & BTUSB_INTEL_NEW) { - hdev->manufacturer = 2; - hdev->send = btusb_send_frame_intel; - hdev->setup = btusb_setup_intel_new; - hdev->shutdown = btusb_shutdown_intel_new; - hdev->hw_error = btintel_hw_error; - hdev->set_diag = btintel_set_diag; - hdev->set_bdaddr = btintel_set_bdaddr; - hdev->cmd_timeout = btusb_intel_cmd_timeout; - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); - } + /* Combined Intel Device setup to support multiple setup routine */ + if (id->driver_info & BTUSB_INTEL_COMBINED) { + err = btintel_configure_setup(hdev); + if (err) + goto out_free_dev; - if (id->driver_info & BTUSB_INTEL_NEWGEN) { - hdev->manufacturer = 2; + /* Transport specific configuration */ hdev->send = btusb_send_frame_intel; - hdev->setup = btusb_setup_intel_newgen; - hdev->shutdown = btusb_shutdown_intel_new; - hdev->hw_error = btintel_hw_error; - hdev->set_diag = btintel_set_diag; - hdev->set_bdaddr = btintel_set_bdaddr; hdev->cmd_timeout = btusb_intel_cmd_timeout; - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); - data->recv_event = btusb_recv_event_intel; - data->recv_bulk = btusb_recv_bulk_intel; - set_bit(BTUSB_BOOTLOADER, &data->flags); + if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD) + btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD); } if (id->driver_info & BTUSB_MARVELL) @@ -4686,7 +3818,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->setup = btusb_mtk_setup; hdev->shutdown = btusb_mtk_shutdown; hdev->manufacturer = 70; + hdev->cmd_timeout = btusb_mtk_cmd_timeout; set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + data->recv_acl = btusb_recv_acl_mtk; } if (id->driver_info & BTUSB_SWAVE) { @@ -4720,6 +3854,7 @@ static int btusb_probe(struct usb_interface *intf, hdev->set_bdaddr = btusb_set_bdaddr_wcn6855; hdev->cmd_timeout = btusb_qca_cmd_timeout; set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_msft_opcode(hdev, 0xFD70); } if (id->driver_info & BTUSB_AMP) { @@ -4737,11 +3872,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->shutdown = btrtl_shutdown_realtek; hdev->cmd_timeout = btusb_rtl_cmd_timeout; - /* Realtek devices lose their updated firmware over global - * suspend that means host doesn't send SET_FEATURE - * (DEVICE_REMOTE_WAKEUP) - */ - set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); + /* Realtek devices need to set remote wakeup on auto-suspend */ + set_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags); + set_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags); } if (!reset) @@ -4916,12 +4049,15 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message) * Actually, it depends on whether the usb host sends * set feature (enable wakeup) or not. */ - if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags)) { + if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags)) { if (PMSG_IS_AUTO(message) && device_can_wakeup(&data->udev->dev)) data->udev->do_remote_wakeup = 1; - else if (!PMSG_IS_AUTO(message)) + else if (!PMSG_IS_AUTO(message) && + !device_may_wakeup(&data->udev->dev)) { + data->udev->do_remote_wakeup = 0; data->udev->reset_resume = 1; + } } return 0; diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 3cd57fc56ade..ef54afa29357 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -51,6 +51,7 @@ /** * struct bcm_device_data - device specific data * @no_early_set_baudrate: Disallow set baudrate before driver setup() + * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it */ struct bcm_device_data { bool no_early_set_baudrate; @@ -77,6 +78,8 @@ struct bcm_device_data { * @btlp: Apple ACPI method to toggle BT_WAKE pin ("Bluetooth Low Power") * @btpu: Apple ACPI method to drive BT_REG_ON pin high ("Bluetooth Power Up") * @btpd: Apple ACPI method to drive BT_REG_ON pin low ("Bluetooth Power Down") + * @gpio_count: internal counter for GPIO resources associated with ACPI device + * @gpio_int_idx: index in _CRS for GpioInt() resource * @txco_clk: external reference frequency clock used by Bluetooth device * @lpo_clk: external LPO clock used by Bluetooth device * @supplies: VBAT and VDDIO supplies used by Bluetooth device @@ -88,10 +91,13 @@ struct bcm_device_data { * set to 0 if @init_speed is already the preferred baudrate * @irq: interrupt triggered by HOST_WAKE_BT pin * @irq_active_low: whether @irq is active low + * @irq_acquired: flag to show if IRQ handler has been assigned * @hu: pointer to HCI UART controller struct, * used to disable flow control during runtime suspend and system sleep * @is_suspended: whether flow control is currently disabled * @no_early_set_baudrate: don't set_baudrate before setup() + * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it + * @pcm_int_params: keep the initial PCM configuration */ struct bcm_device { /* Must be the first member, hci_serdev.c expects this. */ diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index e0520639f4ba..0c0dedece59c 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/mod_devicetable.h> #include <linux/of_device.h> +#include <linux/pm_runtime.h> #include <linux/serdev.h> #include <linux/skbuff.h> @@ -21,6 +22,8 @@ #include "btrtl.h" #include "hci_uart.h" +#define SUSPEND_TIMEOUT_MS 6000 + #define HCI_3WIRE_ACK_PKT 0 #define HCI_3WIRE_LINK_PKT 15 @@ -51,8 +54,10 @@ /* H5 state flags */ enum { - H5_RX_ESC, /* SLIP escape mode */ - H5_TX_ACK_REQ, /* Pending ack to send */ + H5_RX_ESC, /* SLIP escape mode */ + H5_TX_ACK_REQ, /* Pending ack to send */ + H5_WAKEUP_DISABLE, /* Device cannot wake host */ + H5_HW_FLOW_CONTROL, /* Use HW flow control */ }; struct h5 { @@ -97,6 +102,10 @@ struct h5 { struct gpio_desc *device_wake_gpio; }; +enum h5_driver_info { + H5_INFO_WAKEUP_DISABLE = BIT(0), +}; + struct h5_vnd { int (*setup)(struct h5 *h5); void (*open)(struct h5 *h5); @@ -106,6 +115,11 @@ struct h5_vnd { const struct acpi_gpio_mapping *acpi_gpio_map; }; +struct h5_device_data { + uint32_t driver_info; + struct h5_vnd *vnd; +}; + static void h5_reset_rx(struct h5 *h5); static void h5_link_control(struct hci_uart *hu, const void *data, size_t len) @@ -573,6 +587,10 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count) count -= processed; } + pm_runtime_get(&hu->serdev->dev); + pm_runtime_mark_last_busy(&hu->serdev->dev); + pm_runtime_put_autosuspend(&hu->serdev->dev); + return 0; } @@ -609,6 +627,10 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb) break; } + pm_runtime_get_sync(&hu->serdev->dev); + pm_runtime_mark_last_busy(&hu->serdev->dev); + pm_runtime_put_autosuspend(&hu->serdev->dev); + return 0; } @@ -791,6 +813,8 @@ static int h5_serdev_probe(struct serdev_device *serdev) { struct device *dev = &serdev->dev; struct h5 *h5; + const struct h5_device_data *data; + int err; h5 = devm_kzalloc(dev, sizeof(*h5), GFP_KERNEL); if (!h5) @@ -807,20 +831,19 @@ static int h5_serdev_probe(struct serdev_device *serdev) if (!match) return -ENODEV; - h5->vnd = (const struct h5_vnd *)match->driver_data; + data = (const struct h5_device_data *)match->driver_data; + h5->vnd = data->vnd; h5->id = (char *)match->id; if (h5->vnd->acpi_gpio_map) devm_acpi_dev_add_driver_gpios(dev, h5->vnd->acpi_gpio_map); } else { - const void *data; - data = of_device_get_match_data(dev); if (!data) return -ENODEV; - h5->vnd = (const struct h5_vnd *)data; + h5->vnd = data->vnd; } @@ -833,7 +856,14 @@ static int h5_serdev_probe(struct serdev_device *serdev) if (IS_ERR(h5->device_wake_gpio)) return PTR_ERR(h5->device_wake_gpio); - return hci_uart_register_device(&h5->serdev_hu, &h5p); + err = hci_uart_register_device(&h5->serdev_hu, &h5p); + if (err) + return err; + + if (data->driver_info & H5_INFO_WAKEUP_DISABLE) + set_bit(H5_WAKEUP_DISABLE, &h5->flags); + + return 0; } static void h5_serdev_remove(struct serdev_device *serdev) @@ -902,6 +932,9 @@ static int h5_btrtl_setup(struct h5 *h5) serdev_device_set_baudrate(h5->hu->serdev, controller_baudrate); serdev_device_set_flow_control(h5->hu->serdev, flow_control); + if (flow_control) + set_bit(H5_HW_FLOW_CONTROL, &h5->flags); + err = btrtl_download_firmware(h5->hu->hdev, btrtl_dev); /* Give the device some time before the hci-core sends it a reset */ usleep_range(10000, 20000); @@ -916,11 +949,25 @@ out_free: static void h5_btrtl_open(struct h5 *h5) { + /* + * Since h5_btrtl_resume() does a device_reprobe() the suspend handling + * done by the hci_suspend_notifier is not necessary; it actually causes + * delays and a bunch of errors to get logged, so disable it. + */ + if (test_bit(H5_WAKEUP_DISABLE, &h5->flags)) + set_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &h5->hu->flags); + /* Devices always start with these fixed parameters */ serdev_device_set_flow_control(h5->hu->serdev, false); serdev_device_set_parity(h5->hu->serdev, SERDEV_PARITY_EVEN); serdev_device_set_baudrate(h5->hu->serdev, 115200); + pm_runtime_set_active(&h5->hu->serdev->dev); + pm_runtime_use_autosuspend(&h5->hu->serdev->dev); + pm_runtime_set_autosuspend_delay(&h5->hu->serdev->dev, + SUSPEND_TIMEOUT_MS); + pm_runtime_enable(&h5->hu->serdev->dev); + /* The controller needs up to 500ms to wakeup */ gpiod_set_value_cansleep(h5->enable_gpio, 1); gpiod_set_value_cansleep(h5->device_wake_gpio, 1); @@ -929,21 +976,26 @@ static void h5_btrtl_open(struct h5 *h5) static void h5_btrtl_close(struct h5 *h5) { + pm_runtime_disable(&h5->hu->serdev->dev); + gpiod_set_value_cansleep(h5->device_wake_gpio, 0); gpiod_set_value_cansleep(h5->enable_gpio, 0); } /* Suspend/resume support. On many devices the RTL BT device loses power during * suspend/resume, causing it to lose its firmware and all state. So we simply - * turn it off on suspend and reprobe on resume. This mirrors how RTL devices - * are handled in the USB driver, where the USB_QUIRK_RESET_RESUME is used which + * turn it off on suspend and reprobe on resume. This mirrors how RTL devices + * are handled in the USB driver, where the BTUSB_WAKEUP_DISABLE is used which * also causes a reprobe on resume. */ static int h5_btrtl_suspend(struct h5 *h5) { serdev_device_set_flow_control(h5->hu->serdev, false); gpiod_set_value_cansleep(h5->device_wake_gpio, 0); - gpiod_set_value_cansleep(h5->enable_gpio, 0); + + if (test_bit(H5_WAKEUP_DISABLE, &h5->flags)) + gpiod_set_value_cansleep(h5->enable_gpio, 0); + return 0; } @@ -969,17 +1021,25 @@ static void h5_btrtl_reprobe_worker(struct work_struct *work) static int h5_btrtl_resume(struct h5 *h5) { - struct h5_btrtl_reprobe *reprobe; + if (test_bit(H5_WAKEUP_DISABLE, &h5->flags)) { + struct h5_btrtl_reprobe *reprobe; - reprobe = kzalloc(sizeof(*reprobe), GFP_KERNEL); - if (!reprobe) - return -ENOMEM; + reprobe = kzalloc(sizeof(*reprobe), GFP_KERNEL); + if (!reprobe) + return -ENOMEM; - __module_get(THIS_MODULE); + __module_get(THIS_MODULE); + + INIT_WORK(&reprobe->work, h5_btrtl_reprobe_worker); + reprobe->dev = get_device(&h5->hu->serdev->dev); + queue_work(system_long_wq, &reprobe->work); + } else { + gpiod_set_value_cansleep(h5->device_wake_gpio, 1); + + if (test_bit(H5_HW_FLOW_CONTROL, &h5->flags)) + serdev_device_set_flow_control(h5->hu->serdev, true); + } - INIT_WORK(&reprobe->work, h5_btrtl_reprobe_worker); - reprobe->dev = get_device(&h5->hu->serdev->dev); - queue_work(system_long_wq, &reprobe->work); return 0; } @@ -1001,13 +1061,22 @@ static struct h5_vnd rtl_vnd = { .resume = h5_btrtl_resume, .acpi_gpio_map = acpi_btrtl_gpios, }; + +static const struct h5_device_data h5_data_rtl8822cs = { + .vnd = &rtl_vnd, +}; + +static const struct h5_device_data h5_data_rtl8723bs = { + .driver_info = H5_INFO_WAKEUP_DISABLE, + .vnd = &rtl_vnd, +}; #endif #ifdef CONFIG_ACPI static const struct acpi_device_id h5_acpi_match[] = { #ifdef CONFIG_BT_HCIUART_RTL - { "OBDA0623", (kernel_ulong_t)&rtl_vnd }, - { "OBDA8723", (kernel_ulong_t)&rtl_vnd }, + { "OBDA0623", (kernel_ulong_t)&h5_data_rtl8723bs }, + { "OBDA8723", (kernel_ulong_t)&h5_data_rtl8723bs }, #endif { }, }; @@ -1016,16 +1085,17 @@ MODULE_DEVICE_TABLE(acpi, h5_acpi_match); static const struct dev_pm_ops h5_serdev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(h5_serdev_suspend, h5_serdev_resume) + SET_RUNTIME_PM_OPS(h5_serdev_suspend, h5_serdev_resume, NULL) }; static const struct of_device_id rtl_bluetooth_of_match[] = { #ifdef CONFIG_BT_HCIUART_RTL { .compatible = "realtek,rtl8822cs-bt", - .data = (const void *)&rtl_vnd }, + .data = (const void *)&h5_data_rtl8822cs }, { .compatible = "realtek,rtl8723bs-bt", - .data = (const void *)&rtl_vnd }, + .data = (const void *)&h5_data_rtl8723bs }, { .compatible = "realtek,rtl8723ds-bt", - .data = (const void *)&rtl_vnd }, + .data = (const void *)&h5_data_rtl8723bs }, #endif { }, }; diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 9e03402ef1b3..3b00d82d36cf 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -343,6 +343,9 @@ int hci_uart_register_device(struct hci_uart *hu, hdev->setup = hci_uart_setup; SET_HCIDEV_DEV(hdev, &hu->serdev->dev); + if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags)) + set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); + if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 4e039d7a16f8..fb4a2d0d8cc8 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -86,9 +86,10 @@ struct hci_uart { }; /* HCI_UART proto flag bits */ -#define HCI_UART_PROTO_SET 0 -#define HCI_UART_REGISTERED 1 -#define HCI_UART_PROTO_READY 2 +#define HCI_UART_PROTO_SET 0 +#define HCI_UART_REGISTERED 1 +#define HCI_UART_PROTO_READY 2 +#define HCI_UART_NO_SUSPEND_NOTIFIER 3 /* TX states */ #define HCI_UART_SENDING 1 diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5b9ea66b92dc..bc239a11aa69 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl, struct image_info *img_info); void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl); int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan); + struct mhi_chan *mhi_chan, unsigned int flags); int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan); void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index fc9196f11cb7..84448233f64c 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -1430,7 +1430,7 @@ exit_unprepare_channel: } int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan) + struct mhi_chan *mhi_chan, unsigned int flags) { int ret = 0; struct device *dev = &mhi_chan->mhi_dev->dev; @@ -1455,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, if (ret) goto error_pm_state; + if (mhi_chan->dir == DMA_FROM_DEVICE) + mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS); + /* Pre-allocate buffer for xfer ring */ if (mhi_chan->pre_alloc) { int nr_el = get_nr_avail_ring_elements(mhi_cntrl, @@ -1610,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan) } /* Move channel to start state */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags) { int ret, dir; struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; @@ -1621,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) if (!mhi_chan) continue; - ret = mhi_prepare_channel(mhi_cntrl, mhi_chan); + ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags); if (ret) goto error_open_chan; } diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 38cb116ed433..0ef98e3ba341 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -100,6 +100,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = { * @cookie: data used by legacy platform callbacks * @name: name if available * @revision: interconnect target module revision + * @reserved: target module is reserved and already in use * @enabled: sysc runtime enabled status * @needs_resume: runtime resume needed on resume from suspend * @child_needs_resume: runtime resume needed for child on resume from suspend @@ -130,6 +131,7 @@ struct sysc { struct ti_sysc_cookie cookie; const char *name; u32 revision; + unsigned int reserved:1; unsigned int enabled:1; unsigned int needs_resume:1; unsigned int child_needs_resume:1; @@ -2951,6 +2953,8 @@ static int sysc_init_soc(struct sysc *ddata) case SOC_3430 ... SOC_3630: sysc_add_disabled(0x48304000); /* timer12 */ break; + case SOC_AM3: + sysc_add_disabled(0x48310000); /* rng */ default: break; } @@ -3093,8 +3097,8 @@ static int sysc_probe(struct platform_device *pdev) return error; error = sysc_check_active_timer(ddata); - if (error) - return error; + if (error == -EBUSY) + ddata->reserved = true; error = sysc_get_clocks(ddata); if (error) @@ -3130,11 +3134,15 @@ static int sysc_probe(struct platform_device *pdev) sysc_show_registers(ddata); ddata->dev->type = &sysc_device_type; - error = of_platform_populate(ddata->dev->of_node, sysc_match_table, - pdata ? pdata->auxdata : NULL, - ddata->dev); - if (error) - goto err; + + if (!ddata->reserved) { + error = of_platform_populate(ddata->dev->of_node, + sysc_match_table, + pdata ? pdata->auxdata : NULL, + ddata->dev); + if (error) + goto err; + } INIT_DELAYED_WORK(&ddata->idle_work, ti_sysc_idle); diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c index 2ccdf8ac6994..6e3235565a4d 100644 --- a/drivers/char/tpm/tpm_ftpm_tee.c +++ b/drivers/char/tpm/tpm_ftpm_tee.c @@ -254,11 +254,11 @@ static int ftpm_tee_probe(struct device *dev) pvt_data->session = sess_arg.session; /* Allocate dynamic shared memory with fTPM TA */ - pvt_data->shm = tee_shm_alloc(pvt_data->ctx, - MAX_COMMAND_SIZE + MAX_RESPONSE_SIZE, - TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + pvt_data->shm = tee_shm_alloc_kernel_buf(pvt_data->ctx, + MAX_COMMAND_SIZE + + MAX_RESPONSE_SIZE); if (IS_ERR(pvt_data->shm)) { - dev_err(dev, "%s: tee_shm_alloc failed\n", __func__); + dev_err(dev, "%s: tee_shm_alloc_kernel_buf failed\n", __func__); rc = -ENOMEM; goto out_shm_alloc; } diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index be160764911b..f9d5b7334341 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, } EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional); +static void devm_clk_bulk_release_all(struct device *dev, void *res) +{ + struct clk_bulk_devres *devres = res; + + clk_bulk_put_all(devres->num_clks, devres->clks); +} + int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { struct clk_bulk_devres *devres; int ret; - devres = devres_alloc(devm_clk_bulk_release, + devres = devres_alloc(devm_clk_bulk_release_all, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 18117ce5ff85..5c75e3d906c2 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -526,7 +526,7 @@ struct stm32f4_pll { struct stm32f4_pll_post_div_data { int idx; - u8 pll_num; + int pll_idx; const char *name; const char *parent; u8 flag; @@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = { #define MAX_POST_DIV 3 static const struct stm32f4_pll_post_div_data post_div_data[MAX_POST_DIV] = { - { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q", + { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL}, - { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q", + { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL }, - { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, + { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table }, }; @@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np) post_div->width, post_div->flag_div, post_div->div_table, - clks[post_div->pll_num], + clks[post_div->pll_idx], &stm32f4_clk_lock); if (post_div->idx != NO_IDX) diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 5ecc37aaa118..c1ec75aa4ccd 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -18,6 +18,7 @@ config COMMON_CLK_HI3519 config COMMON_CLK_HI3559A bool "Hi3559A Clock Driver" depends on ARCH_HISI || COMPILE_TEST + select RESET_HISI default ARCH_HISI help Build the clock driver for hi3559a. diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c index 800b2fef1887..b2c142f3a649 100644 --- a/drivers/clk/qcom/clk-smd-rpm.c +++ b/drivers/clk/qcom/clk-smd-rpm.c @@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, static struct clk_smd_rpm *msm8936_clks[] = { [RPM_SMD_PCNOC_CLK] = &msm8916_pcnoc_clk, - [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_clk, + [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_a_clk, [RPM_SMD_SNOC_CLK] = &msm8916_snoc_clk, [RPM_SMD_SNOC_A_CLK] = &msm8916_snoc_a_clk, [RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk, diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c index 316912d3b1a4..4f2c3309eea4 100644 --- a/drivers/clk/tegra/clk-sdmmc-mux.c +++ b/drivers/clk/tegra/clk-sdmmc-mux.c @@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw) gate_ops->disable(gate_hw); } +static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw) +{ + struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw); + const struct clk_ops *gate_ops = sdmmc_mux->gate_ops; + struct clk_hw *gate_hw = &sdmmc_mux->gate.hw; + + gate_ops->disable_unused(gate_hw); +} + static void clk_sdmmc_mux_restore_context(struct clk_hw *hw) { struct clk_hw *parent = clk_hw_get_parent(hw); @@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = { .is_enabled = clk_sdmmc_mux_is_enabled, .enable = clk_sdmmc_mux_enable, .disable = clk_sdmmc_mux_disable, + .disable_unused = clk_sdmmc_mux_disable_unused, .restore_context = clk_sdmmc_mux_restore_context, }; diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 7b91060e82f6..d9262db79cae 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -382,8 +382,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum; alt_recent = idx_recent_sum > NR_RECENT / 2; if (alt_recent || alt_intercepts) { - s64 last_enabled_span_ns = duration_ns; - int last_enabled_idx = idx; + s64 first_suitable_span_ns = duration_ns; + int first_suitable_idx = idx; /* * Look for the deepest idle state whose target residency had @@ -397,37 +397,51 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, intercept_sum = 0; recent_sum = 0; - for (i = idx - 1; i >= idx0; i--) { + for (i = idx - 1; i >= 0; i--) { struct teo_bin *bin = &cpu_data->state_bins[i]; s64 span_ns; intercept_sum += bin->intercepts; recent_sum += bin->recent; + span_ns = teo_middle_of_bin(i, drv); + + if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && + (!alt_intercepts || + 2 * intercept_sum > idx_intercept_sum)) { + if (teo_time_ok(span_ns) && + !dev->states_usage[i].disable) { + idx = i; + duration_ns = span_ns; + } else { + /* + * The current state is too shallow or + * disabled, so take the first enabled + * deeper state with suitable time span. + */ + idx = first_suitable_idx; + duration_ns = first_suitable_span_ns; + } + break; + } + if (dev->states_usage[i].disable) continue; - span_ns = teo_middle_of_bin(i, drv); if (!teo_time_ok(span_ns)) { /* - * The current state is too shallow, so select - * the first enabled deeper state. + * The current state is too shallow, but if an + * alternative candidate state has been found, + * it may still turn out to be a better choice. */ - duration_ns = last_enabled_span_ns; - idx = last_enabled_idx; - break; - } + if (first_suitable_idx != idx) + continue; - if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && - (!alt_intercepts || - 2 * intercept_sum > idx_intercept_sum)) { - idx = i; - duration_ns = span_ns; break; } - last_enabled_span_ns = span_ns; - last_enabled_idx = i; + first_suitable_span_ns = span_ns; + first_suitable_idx = i; } } diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 5fa6ae9dbc8b..44736cbd446e 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -313,7 +313,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, return -ENXIO; if (nr_pages < 0) - return nr_pages; + return -EINVAL; avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 26482c7d4c3a..fc708be7ad9a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -294,6 +294,14 @@ struct idxd_desc { struct idxd_wq *wq; }; +/* + * This is software defined error for the completion status. We overload the error code + * that will never appear in completion status and only SWERR register. + */ +enum idxd_completion_status { + IDXD_COMP_DESC_ABORT = 0xff, +}; + #define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev) #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev) @@ -482,4 +490,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) +{ + idxd_dma_complete_txd(desc, reason); + idxd_free_desc(desc->wq, desc); +} + #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c8ae41d36040..c0f4c0422f32 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -102,6 +102,8 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) spin_lock_init(&idxd->irq_entries[i].list_lock); } + idxd_msix_perm_setup(idxd); + irq_entry = &idxd->irq_entries[0]; rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, 0, "idxd-misc", irq_entry); @@ -148,7 +150,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); return 0; err_wq_irqs: @@ -162,6 +163,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); + idxd_msix_perm_clear(idxd); err_irq_entries: pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); @@ -758,32 +760,40 @@ static void idxd_shutdown(struct pci_dev *pdev) for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; synchronize_irq(irq_entry->vector); - free_irq(irq_entry->vector, irq_entry); if (i == 0) continue; idxd_flush_pending_llist(irq_entry); idxd_flush_work_list(irq_entry); } - - idxd_msix_perm_clear(idxd); - idxd_release_int_handles(idxd); - pci_free_irq_vectors(pdev); - pci_iounmap(pdev, idxd->reg_base); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); + flush_workqueue(idxd->wq); } static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); + struct idxd_irq_entry *irq_entry; + int msixcnt = pci_msix_vec_count(pdev); + int i; dev_dbg(&pdev->dev, "%s called\n", __func__); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); idxd_unregister_devices(idxd); - perfmon_pmu_remove(idxd); + + for (i = 0; i < msixcnt; i++) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + } + idxd_msix_perm_clear(idxd); + idxd_release_int_handles(idxd); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + pci_disable_device(pdev); + destroy_workqueue(idxd->wq); + perfmon_pmu_remove(idxd); + device_unregister(&idxd->conf_dev); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index ae68e1e5487a..4e3a7198c0ca 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -245,12 +245,6 @@ static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) return false; } -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, enum irq_work_type wtype, int *processed, u64 data) @@ -272,8 +266,16 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, reason = IDXD_COMPLETE_DEV_FAIL; llist_for_each_entry_safe(desc, t, head, llnode) { - if (desc->completion->status) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (status) { + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + (*processed)++; + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); (*processed)++; @@ -329,7 +331,14 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, spin_unlock_irqrestore(&irq_entry->list_lock, flags); list_for_each_entry(desc, &flist, list) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 19afb62abaff..36c9c1a89b7e 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -25,11 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) * Descriptor completion vectors are 1...N for MSIX. We will round * robin through the N vectors. */ - wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; + wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1; if (!idxd->int_handles) { desc->hw->int_handle = wq->vec_ptr; } else { - desc->vector = wq->vec_ptr; /* * int_handles are only for descriptor completion. However for device * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even @@ -88,9 +87,64 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } +static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *n; + + lockdep_assert_held(&ie->list_lock); + list_for_each_entry_safe(d, n, &ie->work_list, list) { + if (d == desc) { + list_del(&d->list); + return d; + } + } + + /* + * At this point, the desc needs to be aborted is held by the completion + * handler where it has taken it off the pending list but has not added to the + * work list. It will be cleaned up by the interrupt handler when it sees the + * IDXD_COMP_DESC_ABORT for completion status. + */ + return NULL; +} + +static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *t, *found = NULL; + struct llist_node *head; + unsigned long flags; + + desc->completion->status = IDXD_COMP_DESC_ABORT; + /* + * Grab the list lock so it will block the irq thread handler. This allows the + * abort code to locate the descriptor need to be aborted. + */ + spin_lock_irqsave(&ie->list_lock, flags); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) { + if (d == desc) { + found = desc; + continue; + } + list_add_tail(&desc->list, &ie->work_list); + } + } + + if (!found) + found = list_abort_desc(wq, ie, desc); + spin_unlock_irqrestore(&ie->list_lock, flags); + + if (found) + complete_desc(found, IDXD_COMPLETE_ABORT); +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = NULL; void __iomem *portal; int rc; @@ -108,6 +162,16 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * even on UP because the recipient is a device. */ wmb(); + + /* + * Pending the descriptor to the lockless list for the irq_entry + * that we designated the descriptor to. + */ + if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + ie = &idxd->irq_entries[desc->vector]; + llist_add(&desc->llnode, &ie->pending_llist); + } + if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { @@ -118,29 +182,13 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * device is not accepting descriptor at all. */ rc = enqcmds(portal, desc->hw); - if (rc < 0) + if (rc < 0) { + if (ie) + llist_abort_desc(wq, ie, desc); return rc; + } } percpu_ref_put(&wq->wq_active); - - /* - * Pending the descriptor to the lockless list for the irq_entry - * that we designated the descriptor to. - */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - int vec; - - /* - * If the driver is on host kernel, it would be the value - * assigned to interrupt handle, which is index for MSIX - * vector. If it's guest then can't use the int_handle since - * that is the index to IMS for the entire device. The guest - * device local index will be used. - */ - vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector; - llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist); - } - return 0; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 0460d58e3941..bb4df63906a7 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1744,8 +1744,6 @@ void idxd_unregister_devices(struct idxd_device *idxd) device_unregister(&group->conf_dev); } - - device_unregister(&idxd->conf_dev); } int idxd_register_bus_type(void) diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 7f116bbcfad2..2ddc31e64db0 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -812,6 +812,8 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg( dma_length += sg_dma_len(sg); } + imxdma_config_write(chan, &imxdmac->config, direction); + switch (imxdmac->word_size) { case DMA_SLAVE_BUSWIDTH_4_BYTES: if (sg_dma_len(sgl) & 3 || sgl->dma_address & 3) diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index ec00b20ae8e4..ac61ecda2926 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -67,8 +67,12 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, return NULL; ofdma_target = of_dma_find_controller(&dma_spec_target); - if (!ofdma_target) - return NULL; + if (!ofdma_target) { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + chan = ERR_PTR(-EPROBE_DEFER); + goto err; + } chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); if (IS_ERR_OR_NULL(chan)) { @@ -89,6 +93,7 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, } } +err: /* * Need to put the node back since the ofdma->of_dma_route_allocate * has taken it for generating the new, translated dma_spec diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 8f7ceb698226..1cc06900153e 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -855,8 +855,8 @@ static int usb_dmac_probe(struct platform_device *pdev) error: of_dma_controller_free(pdev->dev.of_node); - pm_runtime_put(&pdev->dev); error_pm: + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); return ret; } diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index f54ecb123a52..7dd1d3d0bf06 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1200,7 +1200,7 @@ static int stm32_dma_alloc_chan_resources(struct dma_chan *c) chan->config_init = false; - ret = pm_runtime_get_sync(dmadev->ddev.dev); + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); if (ret < 0) return ret; @@ -1470,7 +1470,7 @@ static int stm32_dma_suspend(struct device *dev) struct stm32_dma_device *dmadev = dev_get_drvdata(dev); int id, ret, scr; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index ef0d0555103d..a42164389ebc 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -137,7 +137,7 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec, /* Set dma request */ spin_lock_irqsave(&dmamux->lock, flags); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { spin_unlock_irqrestore(&dmamux->lock, flags); goto error; @@ -336,7 +336,7 @@ static int stm32_dmamux_suspend(struct device *dev) struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev); int i, ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; @@ -361,7 +361,7 @@ static int stm32_dmamux_resume(struct device *dev) if (ret < 0) return ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c index 16b19654873d..d6b8a202474f 100644 --- a/drivers/dma/uniphier-xdmac.c +++ b/drivers/dma/uniphier-xdmac.c @@ -209,8 +209,8 @@ static int uniphier_xdmac_chan_stop(struct uniphier_xdmac_chan *xc) writel(0, xc->reg_ch_base + XDMAC_TSS); /* wait until transfer is stopped */ - return readl_poll_timeout(xc->reg_ch_base + XDMAC_STAT, val, - !(val & XDMAC_STAT_TENF), 100, 1000); + return readl_poll_timeout_atomic(xc->reg_ch_base + XDMAC_STAT, val, + !(val & XDMAC_STAT_TENF), 100, 1000); } /* xc->vc.lock must be held by caller */ diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 75c0b8e904e5..4b9530a7bf65 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -394,6 +394,7 @@ struct xilinx_dma_tx_descriptor { * @genlock: Support genlock mode * @err: Channel has errors * @idle: Check for channel idle + * @terminating: Check for channel being synchronized by user * @tasklet: Cleanup work after irq * @config: Device configuration info * @flush_on_fsync: Flush on Frame sync @@ -431,6 +432,7 @@ struct xilinx_dma_chan { bool genlock; bool err; bool idle; + bool terminating; struct tasklet_struct tasklet; struct xilinx_vdma_config config; bool flush_on_fsync; @@ -1049,6 +1051,13 @@ static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan) /* Run any dependencies, then free the descriptor */ dma_run_dependencies(&desc->async_tx); xilinx_dma_free_tx_descriptor(chan, desc); + + /* + * While we ran a callback the user called a terminate function, + * which takes care of cleaning up any remaining descriptors + */ + if (chan->terminating) + break; } spin_unlock_irqrestore(&chan->lock, flags); @@ -1965,6 +1974,8 @@ static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx) if (desc->cyclic) chan->cyclic = true; + chan->terminating = false; + spin_unlock_irqrestore(&chan->lock, flags); return cookie; @@ -2436,6 +2447,7 @@ static int xilinx_dma_terminate_all(struct dma_chan *dchan) xilinx_dma_chan_reset(chan); /* Remove and free all of the descriptors in the lists */ + chan->terminating = true; xilinx_dma_free_descriptors(chan); chan->idle = true; diff --git a/drivers/firmware/broadcom/tee_bnxt_fw.c b/drivers/firmware/broadcom/tee_bnxt_fw.c index ed10da5313e8..a5bf4c3f6dc7 100644 --- a/drivers/firmware/broadcom/tee_bnxt_fw.c +++ b/drivers/firmware/broadcom/tee_bnxt_fw.c @@ -212,10 +212,9 @@ static int tee_bnxt_fw_probe(struct device *dev) pvt_data.dev = dev; - fw_shm_pool = tee_shm_alloc(pvt_data.ctx, MAX_SHM_MEM_SZ, - TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + fw_shm_pool = tee_shm_alloc_kernel_buf(pvt_data.ctx, MAX_SHM_MEM_SZ); if (IS_ERR(fw_shm_pool)) { - dev_err(pvt_data.dev, "tee_shm_alloc failed\n"); + dev_err(pvt_data.dev, "tee_shm_alloc_kernel_buf failed\n"); err = PTR_ERR(fw_shm_pool); goto out_sess; } @@ -242,6 +241,14 @@ static int tee_bnxt_fw_remove(struct device *dev) return 0; } +static void tee_bnxt_fw_shutdown(struct device *dev) +{ + tee_shm_free(pvt_data.fw_shm_pool); + tee_client_close_session(pvt_data.ctx, pvt_data.session_id); + tee_client_close_context(pvt_data.ctx); + pvt_data.ctx = NULL; +} + static const struct tee_client_device_id tee_bnxt_fw_id_table[] = { {UUID_INIT(0x6272636D, 0x2019, 0x0716, 0x42, 0x43, 0x4D, 0x5F, 0x53, 0x43, 0x48, 0x49)}, @@ -257,6 +264,7 @@ static struct tee_client_driver tee_bnxt_fw_driver = { .bus = &tee_bus_type, .probe = tee_bnxt_fw_probe, .remove = tee_bnxt_fw_remove, + .shutdown = tee_bnxt_fw_shutdown, }, }; diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 7bf0a7acae5e..2363fee9211c 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -35,15 +35,48 @@ efi_status_t check_platform_features(void) } /* - * Although relocatable kernels can fix up the misalignment with respect to - * MIN_KIMG_ALIGN, the resulting virtual text addresses are subtly out of - * sync with those recorded in the vmlinux when kaslr is disabled but the - * image required relocation anyway. Therefore retain 2M alignment unless - * KASLR is in use. + * Distro versions of GRUB may ignore the BSS allocation entirely (i.e., fail + * to provide space, and fail to zero it). Check for this condition by double + * checking that the first and the last byte of the image are covered by the + * same EFI memory map entry. */ -static u64 min_kimg_align(void) +static bool check_image_region(u64 base, u64 size) { - return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN; + unsigned long map_size, desc_size, buff_size; + efi_memory_desc_t *memory_map; + struct efi_boot_memmap map; + efi_status_t status; + bool ret = false; + int map_offset; + + map.map = &memory_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = NULL; + map.key_ptr = NULL; + map.buff_size = &buff_size; + + status = efi_get_memory_map(&map); + if (status != EFI_SUCCESS) + return false; + + for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { + efi_memory_desc_t *md = (void *)memory_map + map_offset; + u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE; + + /* + * Find the region that covers base, and return whether + * it covers base+size bytes. + */ + if (base >= md->phys_addr && base < end) { + ret = (base + size) <= end; + break; + } + } + + efi_bs_call(free_pool, memory_map); + + return ret; } efi_status_t handle_kernel_image(unsigned long *image_addr, @@ -56,6 +89,16 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long kernel_size, kernel_memsize = 0; u32 phys_seed = 0; + /* + * Although relocatable kernels can fix up the misalignment with + * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are + * subtly out of sync with those recorded in the vmlinux when kaslr is + * disabled but the image required relocation anyway. Therefore retain + * 2M alignment if KASLR was explicitly disabled, even if it was not + * going to be activated to begin with. + */ + u64 min_kimg_align = efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN; + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { if (!efi_nokaslr) { status = efi_get_random_bytes(sizeof(phys_seed), @@ -76,6 +119,10 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (image->image_base != _text) efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); + if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN)) + efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n", + EFI_KIMG_ALIGN >> 10); + kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); *reserve_size = kernel_memsize; @@ -85,14 +132,18 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, * If KASLR is enabled, and we have some randomness available, * locate the kernel at a randomized offset in physical memory. */ - status = efi_random_alloc(*reserve_size, min_kimg_align(), + status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed); + if (status != EFI_SUCCESS) + efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { status = EFI_OUT_OF_RESOURCES; } if (status != EFI_SUCCESS) { - if (IS_ALIGNED((u64)_text, min_kimg_align())) { + if (!check_image_region((u64)_text, kernel_memsize)) { + efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); + } else if (IS_ALIGNED((u64)_text, min_kimg_align)) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. @@ -103,7 +154,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, - ULONG_MAX, min_kimg_align()); + ULONG_MAX, min_kimg_align); if (status != EFI_SUCCESS) { efi_err("Failed to relocate kernel\n"); diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index a408df474d83..724155b9e10d 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -30,6 +30,8 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, (u64)ULONG_MAX); + if (region_end < size) + return 0; first_slot = round_up(md->phys_addr, align); last_slot = round_down(region_end - size + 1, align); diff --git a/drivers/fpga/dfl-fme-perf.c b/drivers/fpga/dfl-fme-perf.c index 4299145ef347..587c82be12f7 100644 --- a/drivers/fpga/dfl-fme-perf.c +++ b/drivers/fpga/dfl-fme-perf.c @@ -953,6 +953,8 @@ static int fme_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; priv->cpu = target; + perf_pmu_migrate_context(&priv->pmu, cpu, target); + return 0; } diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 4b9157a69fca..50b321a1ab1b 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn, mpc8xxx_gpio_irq_cascade, - IRQF_SHARED, "gpio-cascade", + IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade", mpc8xxx_gc); if (ret) { dev_err(&pdev->dev, diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 5022e0ad0fae..0f5d17f343f1 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) struct resource *res; int ret, irq; - irq = platform_get_irq(pdev, 0); - if (irq < 0) + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) return irq; res = platform_get_resource(pdev, IORESOURCE_IO, 0); @@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - if (irq) { + if (irq > 0) { struct irq_chip *irq_chip = &gpio->irq_chip; u8 irq_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 6cc0d4fa4d0a..4137e848f6a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1040,7 +1040,7 @@ void amdgpu_acpi_detect(void) */ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { -#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) +#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_PM_SLEEP) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 3b5d13189073..8f53837d4d3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -468,6 +468,46 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false; } +/* + * Helper function to query RAS EEPROM address + * + * @adev: amdgpu_device pointer + * + * Return true if vbios supports ras rom address reporting + */ +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union firmware_info *firmware_info; + u8 frev, crev; + + if (i2c_address == NULL) + return false; + + *i2c_address = 0; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, + index, &size, &frev, &crev, &data_offset)) { + /* support firmware_info 3.4 + */ + if ((frev == 3 && crev >=4) || (frev > 3)) { + firmware_info = (union firmware_info *) + (mode_info->atom_context->bios + data_offset); + *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr; + } + } + + if (*i2c_address != 0) + return true; + + return false; +} + + union smu_info { struct atom_smu_info_v3_1 v31; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 1bbbb195015d..751248b253de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -36,6 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address); bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 43e7b61d1c5c..ada7bc19118a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -299,6 +299,9 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ip->major, ip->minor, ip->revision); + if (le16_to_cpu(ip->hw_id) == VCN_HWID) + adev->vcn.num_vcn_inst++; + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, @@ -385,7 +388,7 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { struct binary_header *bhdr; struct harvest_table *harvest_info; - int i; + int i, vcn_harvest_count = 0; bhdr = (struct binary_header *)adev->mman.discovery_bin; harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + @@ -397,8 +400,7 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) switch (le32_to_cpu(harvest_info->list[i].hw_id)) { case VCN_HWID: - adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; - adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + vcn_harvest_count++; break; case DMU_HWID: adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; @@ -407,6 +409,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) break; } } + if (vcn_harvest_count == adev->vcn.num_vcn_inst) { + adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; + adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + } } int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 361b86b71b56..971c5b8e75dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1213,6 +1213,13 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + /* BEIGE_GOBY */ + {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0, 0, 0} }; @@ -1564,6 +1571,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; + } else if (amdgpu_device_supports_boco(drm_dev)) { + /* nothing to do */ } else if (amdgpu_device_supports_baco(drm_dev)) { amdgpu_device_baco_enter(drm_dev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index f40c871da0c6..38222de921d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -26,6 +26,7 @@ #include "amdgpu_ras.h" #include <linux/bits.h> #include "atom.h" +#include "amdgpu_atomfirmware.h" #define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 @@ -96,6 +97,9 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, if (!i2c_addr) return false; + if (amdgpu_atomfirmware_ras_rom_addr(adev, (uint8_t*)i2c_addr)) + return true; + switch (adev->asic_type) { case CHIP_VEGA20: *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 59e0fefb15aa..acfa207cf970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -54,11 +54,12 @@ static inline void amdgpu_res_first(struct ttm_resource *res, { struct drm_mm_node *node; - if (!res) { + if (!res || res->mem_type == TTM_PL_SYSTEM) { cur->start = start; cur->size = size; cur->remaining = size; cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 044076ec1d03..6a23c6826e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1295,6 +1295,16 @@ static bool is_raven_kicker(struct amdgpu_device *adev) return false; } +static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) +{ + if ((adev->asic_type == CHIP_RENOIR) && + (adev->gfx.me_fw_version >= 0x000000a5) && + (adev->gfx.me_feature_version >= 52)) + return true; + else + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) @@ -3675,7 +3685,16 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + /* If GC has entered CGPG, ringing doorbell > first page + * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to + * workaround this issue. And this change has to align with firmware + * update. + */ + if (check_if_enlarge_doorbell_range(adev)) + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell.size - 4)); + else + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, (adev->doorbell_index.userqueue_end * 2) << 2); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b53f49a23ddc..afa96c8f721b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1548,6 +1548,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) } hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data; + adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { adev->firmware.ucode[AMDGPU_UCODE_ID_DMCUB].ucode_id = @@ -1561,7 +1562,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) adev->dm.dmcub_fw_version); } - adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version); adev->dm.dmub_srv = kzalloc(sizeof(*adev->dm.dmub_srv), GFP_KERNEL); dmub_srv = adev->dm.dmub_srv; @@ -9605,7 +9605,12 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, } else if (amdgpu_freesync_vid_mode && aconnector && is_freesync_video_mode(&new_crtc_state->mode, aconnector)) { - set_freesync_fixed_config(dm_new_crtc_state); + struct drm_display_mode *high_mode; + + high_mode = get_highest_refresh_rate_mode(aconnector, false); + if (!drm_mode_equal(&new_crtc_state->mode, high_mode)) { + set_freesync_fixed_config(dm_new_crtc_state); + } } ret = dm_atomic_get_state(state, &dm_state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 40f617bbb86f..4aba0e8c84f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -584,7 +584,7 @@ static void amdgpu_dm_irq_schedule_work(struct amdgpu_device *adev, handler_data = container_of(handler_list->next, struct amdgpu_dm_irq_handler_data, list); /*allocate a new amdgpu_dm_irq_handler_data*/ - handler_data_add = kzalloc(sizeof(*handler_data), GFP_KERNEL); + handler_data_add = kzalloc(sizeof(*handler_data), GFP_ATOMIC); if (!handler_data_add) { DRM_ERROR("DM_IRQ: failed to allocate irq handler!\n"); return; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index c6f494f0dcea..6185f9475fa2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -66,9 +66,11 @@ int rn_get_active_display_cnt_wa( for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; + /* Extend the WA to DP for Linux*/ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A || stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK || - stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK || + stream->signal == SIGNAL_TYPE_DISPLAY_PORT) tmds_present = true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 9fb8c46dc606..a6d0fd24fd02 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3602,29 +3602,12 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) bool dp_retrieve_lttpr_cap(struct dc_link *link) { uint8_t lttpr_dpcd_data[6]; - bool vbios_lttpr_enable = false; - bool vbios_lttpr_interop = false; - struct dc_bios *bios = link->dc->ctx->dc_bios; + bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable; + bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; enum dc_status status = DC_ERROR_UNEXPECTED; bool is_lttpr_present = false; memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data)); - /* Query BIOS to determine if LTTPR functionality is forced on by system */ - if (bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = bios->funcs->get_lttpr_caps(bios, &is_vbios_lttpr_enable); - vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - if (bios->funcs->get_lttpr_interop) { - enum bp_result bp_query_result; - uint8_t is_vbios_interop_enabled = 0; - - bp_query_result = bios->funcs->get_lttpr_interop(bios, &is_vbios_interop_enabled); - vbios_lttpr_interop = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; - } /* * Logic to determine LTTPR mode diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8dcea8ff5c5a..af7b60108e9d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -183,6 +183,8 @@ struct dc_caps { unsigned int cursor_cache_size; struct dc_plane_cap planes[MAX_PLANES]; struct dc_color_caps color; + bool vbios_lttpr_aware; + bool vbios_lttpr_enable; }; struct dc_bug_wa { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index 7fa9fc656b0c..f6e747f25ebe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -464,7 +464,7 @@ void optc2_lock_doublebuffer_enable(struct timing_generator *optc) REG_UPDATE_2(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, - h_blank_start - 200 - 1, + (h_blank_start - 200 - 1) / optc1->opp_count, MASTER_UPDATE_LOCK_DB_Y, v_blank_start - 1); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 596c97dce67e..28e15ebf2f43 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1788,7 +1788,6 @@ static bool dcn30_split_stream_for_mpc_or_odm( } pri_pipe->next_odm_pipe = sec_pipe; sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); if (!sec_pipe->top_pipe) sec_pipe->stream_res.opp = pool->opps[pipe_idx]; @@ -2617,6 +2616,26 @@ static bool dcn30_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, + &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 833ab13fa834..dc7823d23ba8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -146,8 +146,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = { .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ .num_states = 1, - .sr_exit_time_us = 26.5, - .sr_enter_plus_exit_time_us = 31, + .sr_exit_time_us = 35.5, + .sr_enter_plus_exit_time_us = 40, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 38c010afade1..cd3248dc31d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1968,6 +1968,22 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 973de346410d..27c7fa3110c8 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -267,11 +267,13 @@ void dmub_dcn31_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset) bool dmub_dcn31_is_hw_init(struct dmub_srv *dmub) { - uint32_t is_hw_init; + union dmub_fw_boot_status status; + uint32_t is_enable; - REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_hw_init); + status.all = REG_READ(DMCUB_SCRATCH0); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enable); - return is_hw_init != 0; + return is_enable != 0 && status.bits.dal_fw; } bool dmub_dcn31_is_supported(struct dmub_srv *dmub) diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 3811e58dd857..44955458fe38 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -590,7 +590,7 @@ struct atom_firmware_info_v3_4 { uint8_t board_i2c_feature_id; // enum of atom_board_i2c_feature_id_def uint8_t board_i2c_feature_gpio_id; // i2c id find in gpio_lut data table gpio_id uint8_t board_i2c_feature_slave_addr; - uint8_t reserved3; + uint8_t ras_rom_i2c_slave_addr; uint16_t bootup_mvddq_mv; uint16_t bootup_mvpp_mv; uint32_t zfbstartaddrin16mb; diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index 3fea2430dec0..dc91eb608791 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -26,7 +26,7 @@ #include "amdgpu_smu.h" #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF -#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x03 +#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x07 /* MP Apertures */ diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h index 5627de734246..c5e26d619bf0 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h @@ -111,7 +111,9 @@ typedef struct { uint32_t InWhisperMode : 1; uint32_t spare0 : 1; uint32_t ZstateStatus : 4; - uint32_t spare1 :12; + uint32_t spare1 : 4; + uint32_t DstateFun : 4; + uint32_t DstateDev : 4; // MP1_EXT_SCRATCH2 uint32_t P2JobHandler :24; uint32_t RsmuPmiP2FinishedCnt : 8; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index c751f717a0da..d92dd2c7448e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -353,8 +353,7 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t val; - if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO) { + if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO) { val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); smu_baco->platform_support = (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 18681dc458da..bcaaa086fc2f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -256,7 +256,7 @@ static int vangogh_tables_init(struct smu_context *smu) return 0; err3_out: - kfree(smu_table->clocks_table); + kfree(smu_table->watermarks_table); err2_out: kfree(smu_table->gpu_metrics_table); err1_out: diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2d5d21740c25..0a8a2395c8ac 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5746,16 +5746,18 @@ static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state) switch (crtc_state->pipe_bpp) { case 18: - val |= PIPEMISC_DITHER_6_BPC; + val |= PIPEMISC_6_BPC; break; case 24: - val |= PIPEMISC_DITHER_8_BPC; + val |= PIPEMISC_8_BPC; break; case 30: - val |= PIPEMISC_DITHER_10_BPC; + val |= PIPEMISC_10_BPC; break; case 36: - val |= PIPEMISC_DITHER_12_BPC; + /* Port output 12BPC defined for ADLP+ */ + if (DISPLAY_VER(dev_priv) > 12) + val |= PIPEMISC_12_BPC_ADLP; break; default: MISSING_CASE(crtc_state->pipe_bpp); @@ -5808,15 +5810,27 @@ int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) tmp = intel_de_read(dev_priv, PIPEMISC(crtc->pipe)); - switch (tmp & PIPEMISC_DITHER_BPC_MASK) { - case PIPEMISC_DITHER_6_BPC: + switch (tmp & PIPEMISC_BPC_MASK) { + case PIPEMISC_6_BPC: return 18; - case PIPEMISC_DITHER_8_BPC: + case PIPEMISC_8_BPC: return 24; - case PIPEMISC_DITHER_10_BPC: + case PIPEMISC_10_BPC: return 30; - case PIPEMISC_DITHER_12_BPC: - return 36; + /* + * PORT OUTPUT 12 BPC defined for ADLP+. + * + * TODO: + * For previous platforms with DSI interface, bits 5:7 + * are used for storing pipe_bpp irrespective of dithering. + * Since the value of 12 BPC is not defined for these bits + * on older platforms, need to find a workaround for 12 BPC + * MIPI DSI HW readout. + */ + case PIPEMISC_12_BPC_ADLP: + if (DISPLAY_VER(dev_priv) > 12) + return 36; + fallthrough; default: MISSING_CASE(tmp); return 0; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 06024d321a1a..cde0a477fb49 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3149,6 +3149,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(_MMIO(0xb110), D_BDW); + MMIO_D(GEN9_SCRATCH_LNCF1, D_BDW_PLUS); MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS | F_CMD_WRITE_PATCH, 0, 0, D_BDW_PLUS, NULL, force_nonpriv_write); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index b8ac80765461..f776c470914d 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -105,6 +105,8 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ {RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ + {RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */ + {RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */ {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 77f1911c463b..3acb0b6be284 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -138,7 +138,7 @@ void i915_globals_unpark(void) atomic_inc(&active); } -static void __exit __i915_globals_flush(void) +static void __i915_globals_flush(void) { atomic_inc(&active); /* skip shrinking */ @@ -148,7 +148,7 @@ static void __exit __i915_globals_flush(void) atomic_dec(&active); } -void __exit i915_globals_exit(void) +void i915_globals_exit(void) { GEM_BUG_ON(atomic_read(&active)); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 35c97c39f125..966664610c8c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -727,9 +727,18 @@ static void err_print_gt(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) >= 12) { int i; - for (i = 0; i < GEN12_SFC_DONE_MAX; i++) + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + /* + * SFC_DONE resides in the VD forcewake domain, so it + * only exists if the corresponding VCS engine is + * present. + */ + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) + continue; + err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, gt->sfc_done[i]); + } err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); } @@ -1581,6 +1590,14 @@ static void gt_record_regs(struct intel_gt_coredump *gt) if (GRAPHICS_VER(i915) >= 12) { for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + /* + * SFC_DONE resides in the VD forcewake domain, so it + * only exists if the corresponding VCS engine is + * present. + */ + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) + continue; + gt->sfc_done[i] = intel_uncore_read(uncore, GEN12_SFC_DONE(i)); } diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 83b500bb170c..2880ec57c97d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1195,6 +1195,7 @@ static int __init i915_init(void) err = pci_register_driver(&i915_pci_driver); if (err) { i915_pmu_exit(); + i915_globals_exit(); return err; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 94fde5ca26ae..476bb3b9ad11 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -422,7 +422,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_HCP_SFC_LOCK_ACK_BIT REG_BIT(1) #define GEN12_HCP_SFC_USAGE_BIT REG_BIT(0) -#define GEN12_SFC_DONE(n) _MMIO(0x1cc00 + (n) * 0x100) +#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) #define GEN12_SFC_DONE_MAX 4 #define RING_PP_DIR_BASE(base) _MMIO((base) + 0x228) @@ -6163,11 +6163,17 @@ enum { #define PIPEMISC_HDR_MODE_PRECISION (1 << 23) /* icl+ */ #define PIPEMISC_OUTPUT_COLORSPACE_YUV (1 << 11) #define PIPEMISC_PIXEL_ROUNDING_TRUNC REG_BIT(8) /* tgl+ */ -#define PIPEMISC_DITHER_BPC_MASK (7 << 5) -#define PIPEMISC_DITHER_8_BPC (0 << 5) -#define PIPEMISC_DITHER_10_BPC (1 << 5) -#define PIPEMISC_DITHER_6_BPC (2 << 5) -#define PIPEMISC_DITHER_12_BPC (3 << 5) +/* + * For Display < 13, Bits 5-7 of PIPE MISC represent DITHER BPC with + * valid values of: 6, 8, 10 BPC. + * ADLP+, the bits 5-7 represent PORT OUTPUT BPC with valid values of: + * 6, 8, 10, 12 BPC. + */ +#define PIPEMISC_BPC_MASK (7 << 5) +#define PIPEMISC_8_BPC (0 << 5) +#define PIPEMISC_10_BPC (1 << 5) +#define PIPEMISC_6_BPC (2 << 5) +#define PIPEMISC_12_BPC_ADLP (4 << 5) /* adlp+ */ #define PIPEMISC_DITHER_ENABLE (1 << 4) #define PIPEMISC_DITHER_TYPE_MASK (3 << 2) #define PIPEMISC_DITHER_TYPE_SP (0 << 2) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index 96ea1a2c11dd..f54392ec4fab 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -203,6 +203,7 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) unsigned long status, val, val1; int plane_id, dma0_state, dma1_state; struct kmb_drm_private *kmb = to_kmb(dev); + u32 ctrl = 0; status = kmb_read_lcd(kmb, LCD_INT_STATUS); @@ -227,6 +228,19 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, kmb->plane_status[plane_id].ctrl); + ctrl = kmb_read_lcd(kmb, LCD_CONTROL); + if (!(ctrl & (LCD_CTRL_VL1_ENABLE | + LCD_CTRL_VL2_ENABLE | + LCD_CTRL_GL1_ENABLE | + LCD_CTRL_GL2_ENABLE))) { + /* If no LCD layers are using DMA, + * then disable DMA pipelined AXI read + * transactions. + */ + kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, + LCD_CTRL_PIPELINE_DMA); + } + kmb->plane_status[plane_id].disable = false; } } @@ -411,10 +425,10 @@ static const struct drm_driver kmb_driver = { .fops = &fops, DRM_GEM_CMA_DRIVER_OPS_VMAP, .name = "kmb-drm", - .desc = "KEEMBAY DISPLAY DRIVER ", - .date = "20201008", - .major = 1, - .minor = 0, + .desc = "KEEMBAY DISPLAY DRIVER", + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, }; static int kmb_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index 02e806712a64..ebbaa5f422d5 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -15,6 +15,11 @@ #define KMB_MAX_HEIGHT 1080 /*Max height in pixels */ #define KMB_MIN_WIDTH 1920 /*Max width in pixels */ #define KMB_MIN_HEIGHT 1080 /*Max height in pixels */ + +#define DRIVER_DATE "20210223" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 + #define KMB_LCD_DEFAULT_CLK 200000000 #define KMB_SYS_CLK_MHZ 500 diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index d5b6195856d1..ecee6782612d 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c +++ b/drivers/gpu/drm/kmb/kmb_plane.c @@ -427,8 +427,14 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, kmb_set_bitmask_lcd(kmb, LCD_CONTROL, ctrl); - /* FIXME no doc on how to set output format,these values are - * taken from the Myriadx tests + /* Enable pipeline AXI read transactions for the DMA + * after setting graphics layers. This must be done + * in a separate write cycle. + */ + kmb_set_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA); + + /* FIXME no doc on how to set output format, these values are taken + * from the Myriadx tests */ out_format |= LCD_OUTF_FORMAT_RGB888; @@ -526,6 +532,11 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm) plane->id = i; } + /* Disable pipeline AXI read transactions for the DMA + * prior to setting graphics layers + */ + kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA); + return primary; cleanup: drmm_kfree(drm, plane); diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index bced555648b0..e94738fe4db8 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -605,11 +605,15 @@ static int mtk_dpi_bridge_atomic_check(struct drm_bridge *bridge, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct mtk_dpi *dpi = bridge->driver_private; + struct mtk_dpi *dpi = bridge_to_dpi(bridge); unsigned int out_bus_format; out_bus_format = bridge_state->output_bus_cfg.format; + if (out_bus_format == MEDIA_BUS_FMT_FIXED) + if (dpi->conf->num_output_fmts) + out_bus_format = dpi->conf->output_fmts[0]; + dev_dbg(dpi->dev, "input format 0x%04x, output format 0x%04x\n", bridge_state->input_bus_cfg.format, bridge_state->output_bus_cfg.format); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 474efb844249..735efe79f075 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -532,13 +532,10 @@ void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *state) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - const struct drm_plane_helper_funcs *plane_helper_funcs = - plane->helper_private; if (!mtk_crtc->enabled) return; - plane_helper_funcs->atomic_update(plane, state); mtk_drm_crtc_update_config(mtk_crtc, false); } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index b5582dcf564c..e6dcb34d3052 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -110,6 +110,35 @@ static int mtk_plane_atomic_async_check(struct drm_plane *plane, true, true); } +static void mtk_plane_update_new_state(struct drm_plane_state *new_state, + struct mtk_plane_state *mtk_plane_state) +{ + struct drm_framebuffer *fb = new_state->fb; + struct drm_gem_object *gem; + struct mtk_drm_gem_obj *mtk_gem; + unsigned int pitch, format; + dma_addr_t addr; + + gem = fb->obj[0]; + mtk_gem = to_mtk_gem_obj(gem); + addr = mtk_gem->dma_addr; + pitch = fb->pitches[0]; + format = fb->format->format; + + addr += (new_state->src.x1 >> 16) * fb->format->cpp[0]; + addr += (new_state->src.y1 >> 16) * pitch; + + mtk_plane_state->pending.enable = true; + mtk_plane_state->pending.pitch = pitch; + mtk_plane_state->pending.format = format; + mtk_plane_state->pending.addr = addr; + mtk_plane_state->pending.x = new_state->dst.x1; + mtk_plane_state->pending.y = new_state->dst.y1; + mtk_plane_state->pending.width = drm_rect_width(&new_state->dst); + mtk_plane_state->pending.height = drm_rect_height(&new_state->dst); + mtk_plane_state->pending.rotation = new_state->rotation; +} + static void mtk_plane_atomic_async_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -126,8 +155,10 @@ static void mtk_plane_atomic_async_update(struct drm_plane *plane, plane->state->src_h = new_state->src_h; plane->state->src_w = new_state->src_w; swap(plane->state->fb, new_state->fb); - new_plane_state->pending.async_dirty = true; + mtk_plane_update_new_state(new_state, new_plane_state); + wmb(); /* Make sure the above parameters are set before update */ + new_plane_state->pending.async_dirty = true; mtk_drm_crtc_async_update(new_state->crtc, plane, state); } @@ -189,14 +220,8 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state); - struct drm_crtc *crtc = new_state->crtc; - struct drm_framebuffer *fb = new_state->fb; - struct drm_gem_object *gem; - struct mtk_drm_gem_obj *mtk_gem; - unsigned int pitch, format; - dma_addr_t addr; - if (!crtc || WARN_ON(!fb)) + if (!new_state->crtc || WARN_ON(!new_state->fb)) return; if (!new_state->visible) { @@ -204,24 +229,7 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, return; } - gem = fb->obj[0]; - mtk_gem = to_mtk_gem_obj(gem); - addr = mtk_gem->dma_addr; - pitch = fb->pitches[0]; - format = fb->format->format; - - addr += (new_state->src.x1 >> 16) * fb->format->cpp[0]; - addr += (new_state->src.y1 >> 16) * pitch; - - mtk_plane_state->pending.enable = true; - mtk_plane_state->pending.pitch = pitch; - mtk_plane_state->pending.format = format; - mtk_plane_state->pending.addr = addr; - mtk_plane_state->pending.x = new_state->dst.x1; - mtk_plane_state->pending.y = new_state->dst.y1; - mtk_plane_state->pending.width = drm_rect_width(&new_state->dst); - mtk_plane_state->pending.height = drm_rect_height(&new_state->dst); - mtk_plane_state->pending.rotation = new_state->rotation; + mtk_plane_update_new_state(new_state, mtk_plane_state); wmb(); /* Make sure the above parameters are set before update */ mtk_plane_state->pending.dirty = true; } diff --git a/drivers/gpu/drm/meson/meson_registers.h b/drivers/gpu/drm/meson/meson_registers.h index 446e7961da48..0f3cafab8860 100644 --- a/drivers/gpu/drm/meson/meson_registers.h +++ b/drivers/gpu/drm/meson/meson_registers.h @@ -634,6 +634,11 @@ #define VPP_WRAP_OSD3_MATRIX_PRE_OFFSET2 0x3dbc #define VPP_WRAP_OSD3_MATRIX_EN_CTRL 0x3dbd +/* osd1 HDR */ +#define OSD1_HDR2_CTRL 0x38a0 +#define OSD1_HDR2_CTRL_VDIN0_HDR2_TOP_EN BIT(13) +#define OSD1_HDR2_CTRL_REG_ONLY_MAT BIT(16) + /* osd2 scaler */ #define OSD2_VSC_PHASE_STEP 0x3d00 #define OSD2_VSC_INI_PHASE 0x3d01 diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index aede0c67a57f..259f3e6bec90 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -425,9 +425,14 @@ void meson_viu_init(struct meson_drm *priv) if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) meson_viu_load_matrix(priv); - else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) + else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { meson_viu_set_g12a_osd1_matrix(priv, RGB709_to_YUV709l_coeff, true); + /* fix green/pink color distortion from vendor u-boot */ + writel_bits_relaxed(OSD1_HDR2_CTRL_REG_ONLY_MAT | + OSD1_HDR2_CTRL_VDIN0_HDR2_TOP_EN, 0, + priv->io_base + _REG(OSD1_HDR2_CTRL)); + } /* Initialize OSD1 fifo control register */ reg = VIU_OSD_DDR_PRIORITY_URGENT | diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d1cef3b69e9d..5652d982b1ce 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -492,7 +492,7 @@ struct vmw_private { resource_size_t vram_start; resource_size_t vram_size; resource_size_t prim_bb_mem; - void __iomem *rmmio; + u32 __iomem *rmmio; u32 *fifo_mem; resource_size_t fifo_mem_size; uint32_t fb_max_width; diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index cceaf69279a9..6304d1dd2dd6 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1224,14 +1224,14 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) disable_irq(iproc_i2c->irq); + tasklet_kill(&iproc_i2c->slave_rx_tasklet); + /* disable all slave interrupts */ tmp = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); tmp &= ~(IE_S_ALL_INTERRUPT_MASK << IE_S_ALL_INTERRUPT_SHIFT); iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, tmp); - tasklet_kill(&iproc_i2c->slave_rx_tasklet); - /* Erase the slave address programmed */ tmp = iproc_i2c_rd_reg(iproc_i2c, S_CFG_SMBUS_ADDR_OFFSET); tmp &= ~BIT(S_CFG_EN_NIC_SMB_ADDR3_SHIFT); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index cb64fe649390..77f576e51652 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -141,7 +141,7 @@ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count, if (count > 8192) count = 8192; - tmp = kmalloc(count, GFP_KERNEL); + tmp = kzalloc(count, GFP_KERNEL); if (tmp == NULL) return -ENOMEM; @@ -150,7 +150,8 @@ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count, ret = i2c_master_recv(client, tmp, count); if (ret >= 0) - ret = copy_to_user(buf, tmp, count) ? -EFAULT : ret; + if (copy_to_user(buf, tmp, ret)) + ret = -EFAULT; kfree(tmp); return ret; } diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index 0e56ace61103..8d8b1ba42ff8 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -231,6 +231,7 @@ config DMARD10 config FXLS8962AF tristate + depends on I2C || !I2C # cannot be built-in for modular I2C config FXLS8962AF_I2C tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer I2C Driver" @@ -247,6 +248,7 @@ config FXLS8962AF_I2C config FXLS8962AF_SPI tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer SPI Driver" depends on SPI + depends on I2C || !I2C select FXLS8962AF select REGMAP_SPI help diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 078d87865fde..0019f1ea7df2 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -637,7 +637,7 @@ static int fxls8962af_i2c_raw_read_errata3(struct fxls8962af_data *data, return ret; } - return ret; + return 0; } static int fxls8962af_fifo_transfer(struct fxls8962af_data *data, diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index 6ef09609be9f..f9c8385c72d3 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -664,8 +664,8 @@ static int palmas_adc_wakeup_configure(struct palmas_gpadc *adc) adc_period = adc->auto_conversion_period; for (i = 0; i < 16; ++i) { - if (((1000 * (1 << i)) / 32) < adc_period) - continue; + if (((1000 * (1 << i)) / 32) >= adc_period) + break; } if (i > 0) i--; diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index 2383eacada87..a2b83f0bd526 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -568,7 +568,6 @@ static int ti_ads7950_probe(struct spi_device *spi) st->ring_xfer.tx_buf = &st->tx_buf[0]; st->ring_xfer.rx_buf = &st->rx_buf[0]; /* len will be set later */ - st->ring_xfer.cs_change = true; spi_message_add_tail(&st->ring_xfer, &st->ring_msg); diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index 2a957f19048e..9e0fce917ce4 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -25,6 +25,8 @@ #include <linux/iio/trigger_consumer.h> #include <linux/iio/triggered_buffer.h> +#include <linux/time.h> + #define HDC100X_REG_TEMP 0x00 #define HDC100X_REG_HUMIDITY 0x01 @@ -166,7 +168,7 @@ static int hdc100x_get_measurement(struct hdc100x_data *data, struct iio_chan_spec const *chan) { struct i2c_client *client = data->client; - int delay = data->adc_int_us[chan->address]; + int delay = data->adc_int_us[chan->address] + 1*USEC_PER_MSEC; int ret; __be16 val; @@ -316,7 +318,7 @@ static irqreturn_t hdc100x_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct hdc100x_data *data = iio_priv(indio_dev); struct i2c_client *client = data->client; - int delay = data->adc_int_us[0] + data->adc_int_us[1]; + int delay = data->adc_int_us[0] + data->adc_int_us[1] + 2*USEC_PER_MSEC; int ret; /* dual read starts at temp register */ diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index a5b421f42287..b9a06ca29bee 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -411,12 +411,11 @@ int __adis_initial_startup(struct adis *adis) int ret; /* check if the device has rst pin low */ - gpio = devm_gpiod_get_optional(&adis->spi->dev, "reset", GPIOD_ASIS); + gpio = devm_gpiod_get_optional(&adis->spi->dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(gpio)) return PTR_ERR(gpio); if (gpio) { - gpiod_set_value_cansleep(gpio, 1); msleep(10); /* bring device out of reset */ gpiod_set_value_cansleep(gpio, 0); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 515a7e95a421..5d3b8b8d163d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -926,12 +926,25 @@ static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) return ret; } +static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct rdma_id_private *id_priv; struct ib_qp *qp; - int ret = 0; + int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id->device != pd->device) { @@ -948,6 +961,8 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); + else + ret = cma_init_conn_qp(id_priv, qp); if (ret) goto out_destroy; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 6c8c910f4e86..c7e8d7b3baa1 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -967,6 +967,12 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } +void c4iw_cq_rem_ref(struct c4iw_cq *chp) +{ + if (refcount_dec_and_test(&chp->refcnt)) + complete(&chp->cq_rel_comp); +} + int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; @@ -976,8 +982,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) chp = to_c4iw_cq(ib_cq); xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - refcount_dec(&chp->refcnt); - wait_event(chp->wait, !refcount_read(&chp->refcnt)); + c4iw_cq_rem_ref(chp); + wait_for_completion(&chp->cq_rel_comp); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); @@ -1081,7 +1087,7 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, spin_lock_init(&chp->lock); spin_lock_init(&chp->comp_handler_lock); refcount_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); + init_completion(&chp->cq_rel_comp); ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); if (ret) goto err_destroy_cq; diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 7798d090888b..34211a533d5c 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -213,8 +213,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) break; } done: - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); c4iw_qp_rem_ref(&qhp->ibqp); out: return; @@ -234,8 +233,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); } else { pr_debug("unknown cqid 0x%x\n", qid); xa_unlock_irqrestore(&dev->cqs, flag); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 3883af3d2312..ac5f581aff4c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -428,7 +428,7 @@ struct c4iw_cq { spinlock_t lock; spinlock_t comp_handler_lock; refcount_t refcnt; - wait_queue_head_t wait; + struct completion cq_rel_comp; struct c4iw_wr_wait *wr_waitp; }; @@ -979,6 +979,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void c4iw_cq_rem_ref(struct c4iw_cq *chp); int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 8f68cc3ff193..84f3f2b5f097 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -213,8 +213,10 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) hr_cmd->context = kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); - if (!hr_cmd->context) + if (!hr_cmd->context) { + hr_dev->cmd_mod = 0; return -ENOMEM; + } for (i = 0; i < hr_cmd->max_cmds; ++i) { hr_cmd->context[i].token = i; @@ -228,7 +230,6 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_cmd->context_lock); hr_cmd->use_events = 1; - down(&hr_cmd->poll_sem); return 0; } @@ -239,8 +240,6 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) kfree(hr_cmd->context); hr_cmd->use_events = 0; - - up(&hr_cmd->poll_sem); } struct hns_roce_cmd_mailbox * diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 078a97193f0e..cc6eab14a222 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -873,11 +873,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->cmd_mod) { ret = hns_roce_cmd_use_events(hr_dev); - if (ret) { + if (ret) dev_warn(dev, "Cmd event mode failed, set back to poll!\n"); - hns_roce_cmd_use_polling(hr_dev); - } } ret = hns_roce_init_hem(hr_dev); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7abeb576b3c5..a190fb581591 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -945,7 +945,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, u32 *cqb = NULL; void *cqc; int cqe_size; - unsigned int irqn; int eqn; int err; @@ -984,7 +983,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, vector, &eqn); if (err) goto err_cqb; @@ -997,7 +996,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(cqc, cqc, uar_page, index); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); @@ -1007,7 +1006,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - cq->mcq.irqn = irqn; if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index eb9b0a2707f8..e95967aefe78 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -975,7 +975,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( struct mlx5_ib_dev *dev; int user_vector; int dev_eqn; - unsigned int irqn; int err; if (uverbs_copy_from(&user_vector, attrs, @@ -987,7 +986,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); - err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn); if (err < 0) return err; @@ -1437,11 +1436,10 @@ out: rcu_read_unlock(); } -static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in) { if (!MLX5_CAP_GEN(dev->mdev, apu) || - !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), - apu_thread_cq)) + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq)) return false; return true; @@ -1501,7 +1499,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); } else if (opcode == MLX5_CMD_OP_CREATE_CQ && - !is_apu_thread_cq(dev, cmd_in)) { + !is_apu_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index b25e0b33a11a..52821485371a 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -8,13 +8,15 @@ #include "srq.h" static int -mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, + struct mlx5_eswitch_rep *rep, + int vport_index) { struct mlx5_ib_dev *ibdev; - int vport_index; ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); - vport_index = rep->vport_index; + if (!ibdev) + return -EINVAL; ibdev->port[vport_index].rep = rep; rep->rep_data[REP_IB].priv = ibdev; @@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return 0; } +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { u32 num_ports = mlx5_eswitch_get_total_vports(dev); const struct mlx5_ib_profile *profile; + struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; + u32 peer_num_ports; int vport_index; int ret; + vport_index = rep->vport_index; + + if (mlx5_lag_is_shared_fdb(dev)) { + peer_dev = mlx5_lag_get_peer_mdev(dev); + peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); + if (mlx5_lag_is_master(dev)) { + /* Only 1 ib port is the representor for both uplinks */ + num_ports += peer_num_ports - 1; + } else { + if (rep->vport == MLX5_VPORT_UPLINK) + return 0; + vport_index += peer_num_ports; + dev = peer_dev; + } + } + if (rep->vport == MLX5_VPORT_UPLINK) profile = &raw_eth_profile; else - return mlx5_ib_set_vport_rep(dev, rep); + return mlx5_ib_set_vport_rep(dev, rep, vport_index); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) @@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto fail_add; rep->rep_data[REP_IB].priv = ibdev; + if (mlx5_lag_is_shared_fdb(dev)) + mlx5_ib_register_peer_vport_reps(dev); return 0; @@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) static void mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { + struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw); struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); + int vport_index = rep->vport_index; struct mlx5_ib_port *port; - port = &dev->port[rep->vport_index]; + if (WARN_ON(!mdev)) + return; + + if (mlx5_lag_is_shared_fdb(mdev) && + !mlx5_lag_is_master(mdev)) { + struct mlx5_core_dev *peer_mdev; + + if (rep->vport == MLX5_VPORT_UPLINK) + return; + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + vport_index += mlx5_eswitch_get_total_vports(peer_mdev); + } + + if (!dev) + return; + + port = &dev->port[vport_index]; write_lock(&port->roce.netdev_lock); port->roce.netdev = NULL; write_unlock(&port->roce.netdev_lock); rep->rep_data[REP_IB].priv = NULL; port->rep = NULL; - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) { + struct mlx5_core_dev *peer_mdev; + struct mlx5_eswitch *esw; + + if (mlx5_lag_is_shared_fdb(mdev)) { + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + } __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + } } static const struct mlx5_eswitch_rep_ops rep_ops = { @@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = { .get_proto_dev = mlx5_ib_rep_to_dev, }; +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev); + struct mlx5_eswitch *esw; + + if (!peer_mdev) + return; + + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); +} + struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num) { @@ -123,7 +186,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, rep = dev->port[port - 1].rep; - return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn); + return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn); } static int mlx5r_rep_probe(struct auxiliary_device *adev, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 094c976b1eed..ae05e143401c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev, static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, struct net_device *ndev, + struct net_device *upper, u32 *port_num) { struct net_device *rep_ndev; @@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, if (!port->rep) continue; + if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) { + *port_num = i + 1; + return &port->roce; + } + + if (upper && port->rep->vport == MLX5_VPORT_UPLINK) + continue; + read_lock(&port->roce.netdev_lock); rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw, port->rep->vport); @@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this, } if (ibdev->is_rep) - roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); + roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num); if (!roce) return NOTIFY_DONE; - if ((upper == ndev || (!upper && ndev == roce->netdev)) - && ibdev->ib_active) { + if ((upper == ndev || + ((!upper || ibdev->is_rep) && ndev == roce->netdev)) && + ibdev->ib_active) { struct ib_event ibev = { }; enum ib_port_state port_state; @@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_roce(mdev)) + if (!ns || !mlx5_lag_is_active(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - return err; + if (!dev->is_rep && dev->profile != &raw_eth_profile) { + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; + } err = mlx5_eth_lag_init(dev); if (err) @@ -3085,7 +3097,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) return 0; err_disable_roce: - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); return err; } @@ -3093,7 +3106,8 @@ err_disable_roce: static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { mlx5_eth_lag_cleanup(dev); - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); } static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num, @@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) /* Register only for native ports */ err = mlx5_add_netdev_notifier(dev, port_num); - if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev)) - /* - * We don't enable ETH interface for - * 1. IB representors - * 2. User disabled ROCE through devlink interface - */ + if (err) return err; err = mlx5_enable_eth(dev); @@ -3980,8 +3989,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - if (!dev->is_rep) - mlx5_disable_eth(dev); + mlx5_disable_eth(dev); port_num = mlx5_core_native_port_num(dev->mdev) - 1; mlx5_remove_netdev_notifier(dev, port_num); @@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; - if (!mlx5_lag_is_roce(dev->mdev)) + if (!mlx5_lag_is_active(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3263851ea574..3f1c5a4f158b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -531,8 +531,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) */ spin_unlock_irq(&ent->lock); need_delay = need_resched() || someone_adding(cache) || - time_after(jiffies, - READ_ONCE(cache->last_add) + 300 * HZ); + !time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); spin_lock_irq(&ent->lock); if (ent->disabled) goto out; diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c index c0ddf7b3c6e2..bbfcce3bdc84 100644 --- a/drivers/infiniband/hw/mlx5/std_types.c +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -114,14 +114,18 @@ out: static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num, struct mlx5_ib_uapi_query_port *info) { - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_eswitch_rep *rep; + struct mlx5_core_dev *mdev; int err; rep = dev->port[port_num - 1].rep; if (!rep) return -EOPNOTSUPP; + mdev = mlx5_eswitch_get_core_dev(rep->esw); + if (!mdev) + return -EINVAL; + info->vport = rep->vport; info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT; @@ -138,9 +142,9 @@ static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num, if (err) return err; - if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) { + if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) { info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match( - mdev->priv.eswitch, rep->vport); + rep->esw, rep->vport); info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index dec92928a1cd..5ac27f28ace1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -259,6 +259,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, iph->version = IPVERSION; iph->ihl = sizeof(struct iphdr) >> 2; + iph->tot_len = htons(skb->len); iph->frag_off = df; iph->protocol = proto; iph->tos = tos; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 3743dc39b60c..360ec67cb9e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -318,7 +318,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); return RESPST_ERR_MALFORMED_WQE; } - size = sizeof(wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); + size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 8a1e70e00876..7887941730db 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -403,7 +403,7 @@ struct icc_path *devm_of_icc_get(struct device *dev, const char *name) { struct icc_path **ptr, *path; - ptr = devres_alloc(devm_icc_release, sizeof(**ptr), GFP_KERNEL); + ptr = devres_alloc(devm_icc_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); @@ -973,9 +973,14 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) } node->avg_bw = node->init_avg; node->peak_bw = node->init_peak; + + if (provider->pre_aggregate) + provider->pre_aggregate(node); + if (provider->aggregate) provider->aggregate(node, 0, node->init_avg, node->init_peak, &node->avg_bw, &node->peak_bw); + provider->set(node, node); node->avg_bw = 0; node->peak_bw = 0; @@ -1106,6 +1111,8 @@ void icc_sync_state(struct device *dev) dev_dbg(p->dev, "interconnect provider is in synced state\n"); list_for_each_entry(n, &p->nodes, node_list) { if (n->init_avg || n->init_peak) { + n->init_avg = 0; + n->init_peak = 0; aggregate_requests(n); p->set(n, n); } diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index bf01d09dba6c..27cc5f03611c 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -20,13 +20,18 @@ void qcom_icc_pre_aggregate(struct icc_node *node) { size_t i; struct qcom_icc_node *qn; + struct qcom_icc_provider *qp; qn = node->data; + qp = to_qcom_provider(node->provider); for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) { qn->sum_avg[i] = 0; qn->max_peak[i] = 0; } + + for (i = 0; i < qn->num_bcms; i++) + qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); } EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate); @@ -44,10 +49,8 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, { size_t i; struct qcom_icc_node *qn; - struct qcom_icc_provider *qp; qn = node->data; - qp = to_qcom_provider(node->provider); if (!tag) tag = QCOM_ICC_TAG_ALWAYS; @@ -57,14 +60,16 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, qn->sum_avg[i] += avg_bw; qn->max_peak[i] = max_t(u32, qn->max_peak[i], peak_bw); } + + if (node->init_avg || node->init_peak) { + qn->sum_avg[i] = max_t(u64, qn->sum_avg[i], node->init_avg); + qn->max_peak[i] = max_t(u64, qn->max_peak[i], node->init_peak); + } } *agg_avg += avg_bw; *agg_peak = max_t(u32, *agg_peak, peak_bw); - for (i = 0; i < qn->num_bcms; i++) - qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); - return 0; } EXPORT_SYMBOL_GPL(qcom_icc_aggregate); @@ -79,7 +84,6 @@ EXPORT_SYMBOL_GPL(qcom_icc_aggregate); int qcom_icc_set(struct icc_node *src, struct icc_node *dst) { struct qcom_icc_provider *qp; - struct qcom_icc_node *qn; struct icc_node *node; if (!src) @@ -88,12 +92,6 @@ int qcom_icc_set(struct icc_node *src, struct icc_node *dst) node = src; qp = to_qcom_provider(node->provider); - qn = node->data; - - qn->sum_avg[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->sum_avg[QCOM_ICC_BUCKET_AMC], - node->avg_bw); - qn->max_peak[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->max_peak[QCOM_ICC_BUCKET_AMC], - node->peak_bw); qcom_icc_bcm_voter_commit(qp->voter); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 51f2547c2007..3c44c4bb40fc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -474,8 +474,6 @@ static void raid1_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_WriteError, &r1_bio->state); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 16977e8e075d..07119d7e0fdf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -471,12 +471,12 @@ static void raid10_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R10BIO_WriteError, &r10_bio->state); else { + /* Fail the request */ + set_bit(R10BIO_Degraded, &r10_bio->state); r10_bio->devs[slot].bio = NULL; to_put = bio; dec_rdev = 1; diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 02281d13505f..508ac295eb06 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, struct media_request *req) { struct vb2_buffer *vb; + enum vb2_buffer_state orig_state; int ret; if (q->error) { @@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, * Add to the queued buffers list, a buffer will stay on it until * dequeued in dqbuf. */ + orig_state = vb->state; list_add_tail(&vb->queued_entry, &q->queued_list); q->queued_count++; q->waiting_for_buffers = false; @@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, if (q->streaming && !q->start_streaming_called && q->queued_count >= q->min_buffers_needed) { ret = vb2_start_streaming(q); - if (ret) + if (ret) { + /* + * Since vb2_core_qbuf will return with an error, + * we should return it to state DEQUEUED since + * the error indicates that the buffer wasn't queued. + */ + list_del(&vb->queued_entry); + q->queued_count--; + vb->state = orig_state; return ret; + } } dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index); diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig index 99b51213f871..dda2f27da317 100644 --- a/drivers/media/platform/atmel/Kconfig +++ b/drivers/media/platform/atmel/Kconfig @@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL Image Sensor Controller available as a v4l2 device. @@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL eXtended Image Sensor Controller available as a v4l2 device. +config VIDEO_ATMEL_ISC_BASE + tristate + default n + help + ATMEL ISC and XISC common code base. + config VIDEO_ATMEL_ISI tristate "ATMEL Image Sensor Interface (ISI) support" depends on VIDEO_V4L2 && OF diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile index c5c01556c653..46d264ab7948 100644 --- a/drivers/media/platform/atmel/Makefile +++ b/drivers/media/platform/atmel/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only -atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o -atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o +atmel-isc-objs = atmel-sama5d2-isc.o +atmel-xisc-objs = atmel-sama7g5-isc.o obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o +obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index 19daa49bf604..136ab7cf36ed 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_clk_init); void isc_clk_cleanup(struct isc_device *isc) { @@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc) clk_unregister(isc_clk->clk); } } +EXPORT_SYMBOL_GPL(isc_clk_cleanup); static int isc_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, @@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id) return ret; } +EXPORT_SYMBOL_GPL(isc_interrupt); static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max) { @@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = { .unbind = isc_async_unbind, .complete = isc_async_complete, }; +EXPORT_SYMBOL_GPL(isc_async_ops); void isc_subdev_cleanup(struct isc_device *isc) { @@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc) INIT_LIST_HEAD(&isc->subdev_entities); } +EXPORT_SYMBOL_GPL(isc_subdev_cleanup); int isc_pipeline_init(struct isc_device *isc) { @@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_pipeline_init); /* regmap configuration */ #define ATMEL_ISC_REG_MAX 0xd5c @@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = { .val_bits = 32, .max_register = ATMEL_ISC_REG_MAX, }; +EXPORT_SYMBOL_GPL(isc_regmap_config); +MODULE_AUTHOR("Songjun Wu"); +MODULE_AUTHOR("Eugen Hristev"); +MODULE_DESCRIPTION("Atmel ISC common code base"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 83705730e37e..795a012d4020 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) } else { /* read */ requesttype = (USB_TYPE_VENDOR | USB_DIR_IN); - pipe = usb_rcvctrlpipe(d->udev, 0); + + /* + * Zero-length transfers must use usb_sndctrlpipe() and + * rtl28xxu_identify_state() uses a zero-length i2c read + * command to determine the chip type. + */ + if (req->size) + pipe = usb_rcvctrlpipe(d->udev, 0); + else + pipe = usb_sndctrlpipe(d->udev, 0); } ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value, @@ -612,9 +621,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d) static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name) { struct rtl28xxu_dev *dev = d_to_priv(d); - u8 buf[1]; int ret; - struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf}; + struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL}; dev_dbg(&d->intf->dev, "\n"); diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 3097e93787f7..a761134fd3be 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -119,7 +119,7 @@ static int cfi_use_status_reg(struct cfi_private *cfi) struct cfi_pri_amdstd *extp = cfi->cmdset_priv; u8 poll_mask = CFI_POLL_STATUS_REG | CFI_POLL_DQ; - return extp->MinorVersion >= '5' && + return extp && extp->MinorVersion >= '5' && (extp->SoftwareFeatures & poll_mask) == CFI_POLL_STATUS_REG; } diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index efc2003bd13a..99400d0fb8c1 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -229,7 +229,7 @@ static int mchp48l640_write(struct mtd_info *mtd, loff_t to, size_t len, woff += ws; } - return ret; + return 0; } static int mchp48l640_read_page(struct mtd_info *mtd, loff_t from, size_t len, @@ -255,6 +255,7 @@ static int mchp48l640_read_page(struct mtd_info *mtd, loff_t from, size_t len, if (!ret) *retlen += len; + kfree(cmd); return ret; fail: @@ -286,7 +287,7 @@ static int mchp48l640_read(struct mtd_info *mtd, loff_t from, size_t len, woff += ws; } - return ret; + return 0; }; static const struct mchp48_caps mchp48l640_caps = { diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 6ce4bc57f919..44bea3f65060 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -419,6 +419,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) if (tr->discard) { blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq); blk_queue_max_discard_sectors(new->rq, UINT_MAX); + new->rq->limits.discard_granularity = tr->blksize; } gd->queue = new->rq; @@ -525,14 +526,10 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) if (!blktrans_notifier.list.next) register_mtd_user(&blktrans_notifier); - - mutex_lock(&mtd_table_mutex); - ret = register_blkdev(tr->major, tr->name); if (ret < 0) { printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n", tr->name, tr->major, ret); - mutex_unlock(&mtd_table_mutex); return ret; } @@ -542,12 +539,12 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkshift = ffs(tr->blksize) - 1; INIT_LIST_HEAD(&tr->devs); - list_add(&tr->list, &blktrans_majors); + mutex_lock(&mtd_table_mutex); + list_add(&tr->list, &blktrans_majors); mtd_for_each_device(mtd) if (mtd->type != MTD_ABSENT) tr->add_mtd(tr, mtd); - mutex_unlock(&mtd_table_mutex); return 0; } @@ -564,8 +561,8 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr) list_for_each_entry_safe(dev, next, &tr->devs, list) tr->remove_dev(dev); - unregister_blkdev(tr->major, tr->name); mutex_unlock(&mtd_table_mutex); + unregister_blkdev(tr->major, tr->name); BUG_ON(!list_empty(&tr->devs)); return 0; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index b5ccd3037788..c8fd7f758938 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -806,7 +806,9 @@ static ssize_t mtd_otp_size(struct mtd_info *mtd, bool is_user) err: kfree(info); - return ret; + + /* ENODATA means there is no OTP region. */ + return ret == -ENODATA ? 0 : ret; } static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 57a583149cc0..3d6c6e880520 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5228,12 +5228,18 @@ static bool of_get_nand_on_flash_bbt(struct device_node *np) static int of_get_nand_secure_regions(struct nand_chip *chip) { struct device_node *dn = nand_get_flash_node(chip); + struct property *prop; int nr_elem, i, j; - nr_elem = of_property_count_elems_of_size(dn, "secure-regions", sizeof(u64)); - if (!nr_elem) + /* Only proceed if the "secure-regions" property is present in DT */ + prop = of_find_property(dn, "secure-regions", NULL); + if (!prop) return 0; + nr_elem = of_property_count_elems_of_size(dn, "secure-regions", sizeof(u64)); + if (nr_elem <= 0) + return nr_elem; + chip->nr_secure_regions = nr_elem / 2; chip->secure_regions = kcalloc(chip->nr_secure_regions, sizeof(*chip->secure_regions), GFP_KERNEL); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 995c613086aa..f37b1c56f7c4 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -551,8 +551,8 @@ config VMXNET3 tristate "VMware VMXNET3 ethernet driver" depends on PCI && INET depends on !(PAGE_SIZE_64KB || ARM64_64K_PAGES || \ - IA64_PAGE_SIZE_64KB || MICROBLAZE_64K_PAGES || \ - PARISC_PAGE_SIZE_64KB || PPC_64K_PAGES) + IA64_PAGE_SIZE_64KB || PARISC_PAGE_SIZE_64KB || \ + PPC_64K_PAGES) help This driver supports VMware's vmxnet3 virtual ethernet NIC. To compile this driver as a module, choose M here: the diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index a7ee0af1af90..54e321a695ce 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -71,12 +71,18 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) family = AF_INET6; if (bareudp->ethertype == htons(ETH_P_IP)) { - struct iphdr *iphdr; + __u8 ipversion; - iphdr = (struct iphdr *)(skb->data + BAREUDP_BASE_HLEN); - if (iphdr->version == 4) { - proto = bareudp->ethertype; - } else if (bareudp->multi_proto_mode && (iphdr->version == 6)) { + if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion, + sizeof(ipversion))) { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + ipversion >>= 4; + + if (ipversion == 4) { + proto = htons(ETH_P_IP); + } else if (ipversion == 6 && bareudp->multi_proto_mode) { proto = htons(ETH_P_IPV6); } else { bareudp->dev->stats.rx_dropped++; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 22e5632089ac..7d3752cbf761 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -17,7 +17,6 @@ #include <linux/if_bonding.h> #include <linux/if_vlan.h> #include <linux/in.h> -#include <net/ipx.h> #include <net/arp.h> #include <net/ipv6.h> #include <asm/byteorder.h> @@ -1351,8 +1350,6 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, if (!is_multicast_ether_addr(eth_data->h_dest)) { switch (skb->protocol) { case htons(ETH_P_IP): - case htons(ETH_P_IPX): - /* In case of IPX, it will falback to L2 hash */ case htons(ETH_P_IPV6): hash_index = bond_xmit_hash(bond, skb); if (bond->params.tlb_dynamic_lb) { @@ -1454,35 +1451,6 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond, hash_size = sizeof(ip6hdr->daddr); break; } - case ETH_P_IPX: { - const struct ipxhdr *ipxhdr; - - if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { - do_tx_balance = false; - break; - } - ipxhdr = (struct ipxhdr *)skb_network_header(skb); - - if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { - /* something is wrong with this packet */ - do_tx_balance = false; - break; - } - - if (ipxhdr->ipx_type != IPX_TYPE_NCP) { - /* The only protocol worth balancing in - * this family since it has an "ARP" like - * mechanism - */ - do_tx_balance = false; - break; - } - - eth_data = eth_hdr(skb); - hash_start = (char *)eth_data->h_dest; - hash_size = ETH_ALEN; - break; - } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3ba5f4871162..b0966e733926 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -317,6 +317,25 @@ bool bond_sk_check(struct bonding *bond) } } +static bool bond_xdp_check(struct bonding *bond) +{ + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_ACTIVEBACKUP: + return true; + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + /* vlan+srcmac is not supported with XDP as in most cases the 802.1q + * payload is not in the packet due to hardware offload. + */ + if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) + return true; + fallthrough; + default: + return false; + } +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, @@ -1712,6 +1731,20 @@ void bond_lower_state_changed(struct slave *slave) netdev_lower_state_changed(slave->dev, &info); } +#define BOND_NL_ERR(bond_dev, extack, errmsg) do { \ + if (extack) \ + NL_SET_ERR_MSG(extack, errmsg); \ + else \ + netdev_err(bond_dev, "Error: %s\n", errmsg); \ +} while (0) + +#define SLAVE_NL_ERR(bond_dev, slave_dev, extack, errmsg) do { \ + if (extack) \ + NL_SET_ERR_MSG(extack, errmsg); \ + else \ + slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg); \ +} while (0) + /* enslave device <slave> to bond device <master> */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) @@ -1725,9 +1758,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (slave_dev->flags & IFF_MASTER && !netif_is_bond_master(slave_dev)) { - NL_SET_ERR_MSG(extack, "Device with IFF_MASTER cannot be enslaved"); - netdev_err(bond_dev, - "Error: Device with IFF_MASTER cannot be enslaved\n"); + BOND_NL_ERR(bond_dev, extack, + "Device type (master device) cannot be enslaved"); return -EPERM; } @@ -1739,15 +1771,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, /* already in-use? */ if (netdev_is_rx_handler_busy(slave_dev)) { - NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved"); - slave_err(bond_dev, slave_dev, - "Error: Device is in use and cannot be enslaved\n"); + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Device is in use and cannot be enslaved"); return -EBUSY; } if (bond_dev == slave_dev) { - NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself."); - netdev_err(bond_dev, "cannot enslave bond to itself.\n"); + BOND_NL_ERR(bond_dev, extack, "Cannot enslave bond to itself."); return -EPERM; } @@ -1756,8 +1786,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n"); if (vlan_uses_dev(bond_dev)) { - NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond"); - slave_err(bond_dev, slave_dev, "Error: cannot enslave VLAN challenged slave on VLAN enabled bond\n"); + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Can not enslave VLAN challenged device to VLAN enabled bond"); return -EPERM; } else { slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n"); @@ -1775,8 +1805,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, * enslaving it; the old ifenslave will not. */ if (slave_dev->flags & IFF_UP) { - NL_SET_ERR_MSG(extack, "Device can not be enslaved while up"); - slave_err(bond_dev, slave_dev, "slave is up - this may be due to an out of date ifenslave\n"); + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Device can not be enslaved while up"); return -EPERM; } @@ -1815,17 +1845,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_dev); } } else if (bond_dev->type != slave_dev->type) { - NL_SET_ERR_MSG(extack, "Device type is different from other slaves"); - slave_err(bond_dev, slave_dev, "ether type (%d) is different from other slaves (%d), can not enslave it\n", - slave_dev->type, bond_dev->type); + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Device type is different from other slaves"); return -EINVAL; } if (slave_dev->type == ARPHRD_INFINIBAND && BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves"); - slave_warn(bond_dev, slave_dev, "Type (%d) supports only active-backup mode\n", - slave_dev->type); + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Only active-backup mode is supported for infiniband slaves"); res = -EOPNOTSUPP; goto err_undo_flags; } @@ -1839,8 +1867,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond->params.fail_over_mac = BOND_FOM_ACTIVE; slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n"); } else { - NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active"); - slave_err(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n"); + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Slave device does not support setting the MAC address, but fail_over_mac is not set to active"); res = -EOPNOTSUPP; goto err_undo_flags; } @@ -2133,6 +2161,39 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_update_slave_arr(bond, NULL); + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + if (bond->xdp_prog) { + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Slave does not support XDP"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + } else { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = bond->xdp_prog, + .extack = extack, + }; + + if (dev_xdp_prog_count(slave_dev) > 0) { + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "Slave has XDP program loaded, please unload before enslaving"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + + res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (res < 0) { + /* ndo_bpf() sets extack error message */ + slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); + goto err_sysfs_del; + } + if (bond->xdp_prog) + bpf_prog_inc(bond->xdp_prog); + } + slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", bond_is_active_slave(new_slave) ? "an active" : "a backup", new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); @@ -2252,6 +2313,17 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); + if (bond->xdp_prog) { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = NULL, + .extack = NULL, + }; + if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp)) + slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); + } + /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -3614,90 +3686,112 @@ static struct notifier_block bond_netdev_notifier = { /*---------------------------- Hashing Policies -----------------------------*/ +/* Helper to access data in a packet, with or without a backing skb. + * If skb is given the data is linearized if necessary via pskb_may_pull. + */ +static inline const void *bond_pull_data(struct sk_buff *skb, + const void *data, int hlen, int n) +{ + if (likely(n <= hlen)) + return data; + else if (skb && likely(pskb_may_pull(skb, n))) + return skb->head; + + return NULL; +} + /* L2 hash helper */ -static inline u32 bond_eth_hash(struct sk_buff *skb) +static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) { - struct ethhdr *ep, hdr_tmp; + struct ethhdr *ep; - ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp); - if (ep) - return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; - return 0; + data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); + if (!data) + return 0; + + ep = (struct ethhdr *)(data + mhoff); + return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto); } -static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, - int *noff, int *proto, bool l34) +static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data, + int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34) { const struct ipv6hdr *iph6; const struct iphdr *iph; - if (skb->protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph)))) + if (l2_proto == htons(ETH_P_IP)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph)); + if (!data) return false; - iph = (const struct iphdr *)(skb->data + *noff); + + iph = (const struct iphdr *)(data + *nhoff); iph_to_flow_copy_v4addrs(fk, iph); - *noff += iph->ihl << 2; + *nhoff += iph->ihl << 2; if (!ip_is_fragment(iph)) - *proto = iph->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6)))) + *ip_proto = iph->protocol; + } else if (l2_proto == htons(ETH_P_IPV6)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6)); + if (!data) return false; - iph6 = (const struct ipv6hdr *)(skb->data + *noff); + + iph6 = (const struct ipv6hdr *)(data + *nhoff); iph_to_flow_copy_v6addrs(fk, iph6); - *noff += sizeof(*iph6); - *proto = iph6->nexthdr; + *nhoff += sizeof(*iph6); + *ip_proto = iph6->nexthdr; } else { return false; } - if (l34 && *proto >= 0) - fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto); + if (l34 && *ip_proto >= 0) + fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen); return true; } -static u32 bond_vlan_srcmac_hash(struct sk_buff *skb) +static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) { - struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); u32 srcmac_vendor = 0, srcmac_dev = 0; - u16 vlan; + struct ethhdr *mac_hdr; + u16 vlan = 0; int i; + data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); + if (!data) + return 0; + mac_hdr = (struct ethhdr *)(data + mhoff); + for (i = 0; i < 3; i++) srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i]; for (i = 3; i < ETH_ALEN; i++) srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i]; - if (!skb_vlan_tag_present(skb)) - return srcmac_vendor ^ srcmac_dev; - - vlan = skb_vlan_tag_get(skb); + if (skb && skb_vlan_tag_present(skb)) + vlan = skb_vlan_tag_get(skb); return vlan ^ srcmac_vendor ^ srcmac_dev; } /* Extract the appropriate headers based on bond's xmit policy */ -static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, - struct flow_keys *fk) +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data, + __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk) { bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; - int noff, proto = -1; + int ip_proto = -1; switch (bond->params.xmit_policy) { case BOND_XMIT_POLICY_ENCAP23: case BOND_XMIT_POLICY_ENCAP34: memset(fk, 0, sizeof(*fk)); return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, - fk, NULL, 0, 0, 0, 0); + fk, data, l2_proto, nhoff, hlen, 0); default: break; } fk->ports.ports = 0; memset(&fk->icmp, 0, sizeof(fk->icmp)); - noff = skb_network_offset(skb); - if (!bond_flow_ip(skb, fk, &noff, &proto, l34)) + if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34)) return false; /* ICMP error packets contains at least 8 bytes of the header @@ -3705,22 +3799,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, * to correlate ICMP error packets within the same flow which * generated the error. */ - if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { - skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data, - skb_transport_offset(skb), - skb_headlen(skb)); - if (proto == IPPROTO_ICMP) { + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { + skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen); + if (ip_proto == IPPROTO_ICMP) { if (!icmp_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmphdr); - } else if (proto == IPPROTO_ICMPV6) { + nhoff += sizeof(struct icmphdr); + } else if (ip_proto == IPPROTO_ICMPV6) { if (!icmpv6_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmp6hdr); + nhoff += sizeof(struct icmp6hdr); } - return bond_flow_ip(skb, fk, &noff, &proto, l34); + return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34); } return true; @@ -3736,33 +3828,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow) return hash >> 1; } -/** - * bond_xmit_hash - generate a hash value based on the xmit policy - * @bond: bonding device - * @skb: buffer to use for headers - * - * This function will extract the necessary headers from the skb buffer and use - * them to generate a hash based on the xmit_policy set in the bonding device +/* Generate hash based on xmit policy. If @skb is given it is used to linearize + * the data as required, but this function can be used without it if the data is + * known to be linear (e.g. with xdp_buff). */ -u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data, + __be16 l2_proto, int mhoff, int nhoff, int hlen) { struct flow_keys flow; u32 hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && - skb->l4_hash) - return skb->hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC) - return bond_vlan_srcmac_hash(skb); + return bond_vlan_srcmac_hash(skb, data, mhoff, hlen); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || - !bond_flow_dissect(bond, skb, &flow)) - return bond_eth_hash(skb); + !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow)) + return bond_eth_hash(skb, data, mhoff, hlen); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { - hash = bond_eth_hash(skb); + hash = bond_eth_hash(skb, data, mhoff, hlen); } else { if (flow.icmp.id) memcpy(&hash, &flow.icmp, sizeof(hash)); @@ -3773,6 +3858,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) return bond_ip_hash(hash, &flow); } +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device + */ +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +{ + if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && + skb->l4_hash) + return skb->hash; + + return __bond_xmit_hash(bond, skb, skb->head, skb->protocol, + skb->mac_header, skb->network_header, + skb_headlen(skb)); +} + +/** + * bond_xmit_hash_xdp - generate a hash value based on the xmit policy + * @bond: bonding device + * @xdp: buffer to use for headers + * + * The XDP variant of bond_xmit_hash. + */ +static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) +{ + struct ethhdr *eth; + + if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) + return 0; + + eth = (struct ethhdr *)xdp->data; + + return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0, + sizeof(struct ethhdr), xdp->data_end - xdp->data); +} + /*-------------------------- Device entry points ----------------------------*/ void bond_work_init_all(struct bonding *bond) @@ -4421,6 +4545,47 @@ non_igmp: return NULL; } +static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct slave *slave; + int slave_cnt; + u32 slave_id; + const struct ethhdr *eth; + void *data = xdp->data; + + if (data + sizeof(struct ethhdr) > xdp->data_end) + goto non_igmp; + + eth = (struct ethhdr *)data; + data += sizeof(struct ethhdr); + + /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */ + if (eth->h_proto == htons(ETH_P_IP)) { + const struct iphdr *iph; + + if (data + sizeof(struct iphdr) > xdp->data_end) + goto non_igmp; + + iph = (struct iphdr *)data; + + if (iph->protocol == IPPROTO_IGMP) { + slave = rcu_dereference(bond->curr_active_slave); + if (slave) + return slave; + return bond_get_slave_by_id(bond, 0); + } + } + +non_igmp: + slave_cnt = READ_ONCE(bond->slave_cnt); + if (likely(slave_cnt)) { + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); + } + return NULL; +} + static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) { @@ -4434,8 +4599,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, return bond_tx_drop(bond_dev, skb); } -static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, - struct sk_buff *skb) +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond) { return rcu_dereference(bond->curr_active_slave); } @@ -4449,7 +4613,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); if (slave) return bond_dev_queue_xmit(bond, skb, slave->dev); @@ -4637,6 +4801,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, return slave; } +static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct bond_up_slave *slaves; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash_xdp(bond, xdp); + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + return slaves->arr[hash % count]; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -4747,7 +4927,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, slave = bond_xmit_roundrobin_slave_get(bond, skb); break; case BOND_MODE_ACTIVEBACKUP: - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); break; case BOND_MODE_8023AD: case BOND_MODE_XOR: @@ -4921,6 +5101,172 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +static struct net_device * +bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + /* Caller needs to hold rcu_read_lock() */ + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp); + break; + + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond); + break; + + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp); + break; + + default: + /* Should never happen. Mode guarded by bond_xdp_check() */ + netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond)); + WARN_ON_ONCE(1); + return NULL; + } + + if (slave) + return slave->dev; + + return NULL; +} + +static int bond_xdp_xmit(struct net_device *bond_dev, + int n, struct xdp_frame **frames, u32 flags) +{ + int nxmit, err = -ENXIO; + + rcu_read_lock(); + + for (nxmit = 0; nxmit < n; nxmit++) { + struct xdp_frame *frame = frames[nxmit]; + struct xdp_frame *frames1[] = {frame}; + struct net_device *slave_dev; + struct xdp_buff xdp; + + xdp_convert_frame_to_buff(frame, &xdp); + + slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp); + if (!slave_dev) { + err = -ENXIO; + break; + } + + err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags); + if (err < 1) + break; + } + + rcu_read_unlock(); + + /* If error happened on the first frame then we can pass the error up, otherwise + * report the number of frames that were xmitted. + */ + if (err < 0) + return (nxmit == 0 ? err : nxmit); + + return nxmit; +} + +static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bonding *bond = netdev_priv(dev); + struct list_head *iter; + struct slave *slave, *rollback_slave; + struct bpf_prog *old_prog; + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = prog, + .extack = extack, + }; + int err; + + ASSERT_RTNL(); + + if (!bond_xdp_check(bond)) + return -EOPNOTSUPP; + + old_prog = bond->xdp_prog; + bond->xdp_prog = prog; + + bond_for_each_slave(bond, slave, iter) { + struct net_device *slave_dev = slave->dev; + + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + SLAVE_NL_ERR(dev, slave_dev, extack, + "Slave device does not support XDP"); + err = -EOPNOTSUPP; + goto err; + } + + if (dev_xdp_prog_count(slave_dev) > 0) { + SLAVE_NL_ERR(dev, slave_dev, extack, + "Slave has XDP program loaded, please unload before enslaving"); + err = -EOPNOTSUPP; + goto err; + } + + err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err < 0) { + /* ndo_bpf() sets extack error message */ + slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); + goto err; + } + if (prog) + bpf_prog_inc(prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + if (prog) + static_branch_inc(&bpf_master_redirect_enabled_key); + else + static_branch_dec(&bpf_master_redirect_enabled_key); + + return 0; + +err: + /* unwind the program changes */ + bond->xdp_prog = old_prog; + xdp.prog = old_prog; + xdp.extack = NULL; /* do not overwrite original error */ + + bond_for_each_slave(bond, rollback_slave, iter) { + struct net_device *slave_dev = rollback_slave->dev; + int err_unwind; + + if (slave == rollback_slave) + break; + + err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err_unwind < 0) + slave_err(dev, slave_dev, + "Error %d when unwinding XDP program change\n", err_unwind); + else if (xdp.prog) + bpf_prog_inc(xdp.prog); + } + return err; +} + +static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return bond_xdp_set(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } +} + static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) { if (speed == 0 || speed == SPEED_UNKNOWN) @@ -5009,6 +5355,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_get_xmit_slave = bond_xmit_get_slave, .ndo_sk_get_lower_dev = bond_sk_get_lower_dev, + .ndo_bpf = bond_xdp, + .ndo_xdp_xmit = bond_xdp_xmit, + .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave, }; static const struct device_type bond_type = { diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index 4247ff80a29c..08b6efa7a1a7 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -176,6 +176,13 @@ struct c_can_raminit { bool needs_pulse; }; +/* c_can tx ring structure */ +struct c_can_tx_ring { + unsigned int head; + unsigned int tail; + unsigned int obj_num; +}; + /* c_can private data structure */ struct c_can_priv { struct can_priv can; /* must be the first member */ @@ -190,17 +197,16 @@ struct c_can_priv { unsigned int msg_obj_tx_first; unsigned int msg_obj_tx_last; u32 msg_obj_rx_mask; - atomic_t tx_active; atomic_t sie_pending; unsigned long tx_dir; int last_status; + struct c_can_tx_ring tx; u16 (*read_reg)(const struct c_can_priv *priv, enum reg index); void (*write_reg)(const struct c_can_priv *priv, enum reg index, u16 val); u32 (*read_reg32)(const struct c_can_priv *priv, enum reg index); void (*write_reg32)(const struct c_can_priv *priv, enum reg index, u32 val); void __iomem *base; const u16 *regs; - void *priv; /* for board-specific data */ enum c_can_dev_id type; struct c_can_raminit raminit_sys; /* RAMINIT via syscon regmap */ void (*raminit)(const struct c_can_priv *priv, bool enable); @@ -220,4 +226,19 @@ int c_can_power_down(struct net_device *dev); void c_can_set_ethtool_ops(struct net_device *dev); +static inline u8 c_can_get_tx_head(const struct c_can_tx_ring *ring) +{ + return ring->head & (ring->obj_num - 1); +} + +static inline u8 c_can_get_tx_tail(const struct c_can_tx_ring *ring) +{ + return ring->tail & (ring->obj_num - 1); +} + +static inline u8 c_can_get_tx_free(const struct c_can_tx_ring *ring) +{ + return ring->obj_num - (ring->head - ring->tail); +} + #endif /* C_CAN_H */ diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c index 7588f70ca0fe..52671d1ea17d 100644 --- a/drivers/net/can/c_can/c_can_main.c +++ b/drivers/net/can/c_can/c_can_main.c @@ -160,8 +160,8 @@ #define IF_MCONT_TX (IF_MCONT_TXIE | IF_MCONT_EOB) -/* Use IF1 for RX and IF2 for TX */ -#define IF_RX 0 +/* Use IF1 in NAPI path and IF2 in TX path */ +#define IF_NAPI 0 #define IF_TX 1 /* minimum timeout for checking BUSY status */ @@ -427,24 +427,51 @@ static void c_can_setup_receive_object(struct net_device *dev, int iface, c_can_object_put(dev, iface, obj, IF_COMM_RCV_SETUP); } +static bool c_can_tx_busy(const struct c_can_priv *priv, + const struct c_can_tx_ring *tx_ring) +{ + if (c_can_get_tx_free(tx_ring) > 0) + return false; + + netif_stop_queue(priv->dev); + + /* Memory barrier before checking tx_free (head and tail) */ + smp_mb(); + + if (c_can_get_tx_free(tx_ring) == 0) { + netdev_dbg(priv->dev, + "Stopping tx-queue (tx_head=0x%08x, tx_tail=0x%08x, len=%d).\n", + tx_ring->head, tx_ring->tail, + tx_ring->head - tx_ring->tail); + return true; + } + + netif_start_queue(priv->dev); + return false; +} + static netdev_tx_t c_can_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct can_frame *frame = (struct can_frame *)skb->data; struct c_can_priv *priv = netdev_priv(dev); - u32 idx, obj; + struct c_can_tx_ring *tx_ring = &priv->tx; + u32 idx, obj, cmd = IF_COMM_TX; if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; - /* This is not a FIFO. C/D_CAN sends out the buffers - * prioritized. The lowest buffer number wins. - */ - idx = fls(atomic_read(&priv->tx_active)); - obj = idx + priv->msg_obj_tx_first; - /* If this is the last buffer, stop the xmit queue */ - if (idx == priv->msg_obj_tx_num - 1) + if (c_can_tx_busy(priv, tx_ring)) + return NETDEV_TX_BUSY; + + idx = c_can_get_tx_head(tx_ring); + tx_ring->head++; + if (c_can_get_tx_free(tx_ring) == 0) netif_stop_queue(dev); + + if (idx < c_can_get_tx_tail(tx_ring)) + cmd &= ~IF_COMM_TXRQST; /* Cache the message */ + /* Store the message in the interface so we can call * can_put_echo_skb(). We must do this before we enable * transmit as we might race against do_tx(). @@ -452,11 +479,8 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb, c_can_setup_tx_object(dev, IF_TX, frame, idx); priv->dlc[idx] = frame->len; can_put_echo_skb(skb, dev, idx, 0); - - /* Update the active bits */ - atomic_add(BIT(idx), &priv->tx_active); - /* Start transmission */ - c_can_object_put(dev, IF_TX, obj, IF_COMM_TX); + obj = idx + priv->msg_obj_tx_first; + c_can_object_put(dev, IF_TX, obj, cmd); return NETDEV_TX_OK; } @@ -529,13 +553,13 @@ static void c_can_configure_msg_objects(struct net_device *dev) /* first invalidate all message objects */ for (i = priv->msg_obj_rx_first; i <= priv->msg_obj_num; i++) - c_can_inval_msg_object(dev, IF_RX, i); + c_can_inval_msg_object(dev, IF_NAPI, i); /* setup receive message objects */ for (i = priv->msg_obj_rx_first; i < priv->msg_obj_rx_last; i++) - c_can_setup_receive_object(dev, IF_RX, i, 0, 0, IF_MCONT_RCV); + c_can_setup_receive_object(dev, IF_NAPI, i, 0, 0, IF_MCONT_RCV); - c_can_setup_receive_object(dev, IF_RX, priv->msg_obj_rx_last, 0, 0, + c_can_setup_receive_object(dev, IF_NAPI, priv->msg_obj_rx_last, 0, 0, IF_MCONT_RCV_EOB); } @@ -567,6 +591,7 @@ static int c_can_software_reset(struct net_device *dev) static int c_can_chip_config(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); + struct c_can_tx_ring *tx_ring = &priv->tx; int err; err = c_can_software_reset(dev); @@ -598,7 +623,8 @@ static int c_can_chip_config(struct net_device *dev) priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); /* Clear all internal status */ - atomic_set(&priv->tx_active, 0); + tx_ring->head = 0; + tx_ring->tail = 0; priv->tx_dir = 0; /* set bittiming params */ @@ -696,40 +722,57 @@ static int c_can_get_berr_counter(const struct net_device *dev, static void c_can_do_tx(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); + struct c_can_tx_ring *tx_ring = &priv->tx; struct net_device_stats *stats = &dev->stats; - u32 idx, obj, pkts = 0, bytes = 0, pend, clr; + u32 idx, obj, pkts = 0, bytes = 0, pend; + u8 tail; if (priv->msg_obj_tx_last > 32) pend = priv->read_reg32(priv, C_CAN_INTPND3_REG); else pend = priv->read_reg(priv, C_CAN_INTPND2_REG); - clr = pend; while ((idx = ffs(pend))) { idx--; pend &= ~BIT(idx); obj = idx + priv->msg_obj_tx_first; - /* We use IF_RX interface instead of IF_TX because we + /* We use IF_NAPI interface instead of IF_TX because we * are called from c_can_poll(), which runs inside - * NAPI. We are not trasmitting. + * NAPI. We are not transmitting. */ - c_can_inval_tx_object(dev, IF_RX, obj); + c_can_inval_tx_object(dev, IF_NAPI, obj); can_get_echo_skb(dev, idx, NULL); bytes += priv->dlc[idx]; pkts++; } - /* Clear the bits in the tx_active mask */ - atomic_sub(clr, &priv->tx_active); + if (!pkts) + return; - if (clr & BIT(priv->msg_obj_tx_num - 1)) - netif_wake_queue(dev); + tx_ring->tail += pkts; + if (c_can_get_tx_free(tx_ring)) { + /* Make sure that anybody stopping the queue after + * this sees the new tx_ring->tail. + */ + smp_mb(); + netif_wake_queue(priv->dev); + } - if (pkts) { - stats->tx_bytes += bytes; - stats->tx_packets += pkts; - can_led_event(dev, CAN_LED_EVENT_TX); + stats->tx_bytes += bytes; + stats->tx_packets += pkts; + can_led_event(dev, CAN_LED_EVENT_TX); + + tail = c_can_get_tx_tail(tx_ring); + + if (tail == 0) { + u8 head = c_can_get_tx_head(tx_ring); + + /* Start transmission for all cached messages */ + for (idx = tail; idx < head; idx++) { + obj = idx + priv->msg_obj_tx_first; + c_can_object_put(dev, IF_NAPI, obj, IF_COMM_TXRQST); + } } } @@ -766,14 +809,14 @@ static u32 c_can_adjust_pending(u32 pend, u32 rx_mask) static inline void c_can_rx_object_get(struct net_device *dev, struct c_can_priv *priv, u32 obj) { - c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high); + c_can_object_get(dev, IF_NAPI, obj, priv->comm_rcv_high); } static inline void c_can_rx_finalize(struct net_device *dev, struct c_can_priv *priv, u32 obj) { if (priv->type != BOSCH_D_CAN) - c_can_object_get(dev, IF_RX, obj, IF_COMM_CLR_NEWDAT); + c_can_object_get(dev, IF_NAPI, obj, IF_COMM_CLR_NEWDAT); } static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv, @@ -785,10 +828,12 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv, pend &= ~BIT(obj - 1); c_can_rx_object_get(dev, priv, obj); - ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX)); + ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_NAPI)); if (ctrl & IF_MCONT_MSGLST) { - int n = c_can_handle_lost_msg_obj(dev, IF_RX, obj, ctrl); + int n; + + n = c_can_handle_lost_msg_obj(dev, IF_NAPI, obj, ctrl); pkts += n; quota -= n; @@ -803,7 +848,7 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv, continue; /* read the data from the message object */ - c_can_read_msg_object(dev, IF_RX, ctrl); + c_can_read_msg_object(dev, IF_NAPI, ctrl); c_can_rx_finalize(dev, priv, obj); @@ -1205,6 +1250,10 @@ struct net_device *alloc_c_can_dev(int msg_obj_num) priv->msg_obj_tx_last = priv->msg_obj_tx_first + priv->msg_obj_tx_num - 1; + priv->tx.head = 0; + priv->tx.tail = 0; + priv->tx.obj_num = msg_obj_tx_num; + netif_napi_add(dev, &priv->napi, c_can_poll, priv->msg_obj_rx_num); priv->dev = dev; diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 36950363682f..86e95e9d6533 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -385,7 +385,6 @@ static int c_can_plat_probe(struct platform_device *pdev) priv->base = addr; priv->device = &pdev->dev; priv->can.clock.freq = clk_get_rate(clk); - priv->priv = clk; priv->type = drvdata->id; platform_set_drvdata(pdev, dev); diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 311d8564d611..e3d840b81357 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -15,6 +15,7 @@ #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/can/led.h> +#include <linux/gpio/consumer.h> #include <linux/of.h> #define MOD_DESC "CAN device driver interface" @@ -400,10 +401,69 @@ void close_candev(struct net_device *dev) } EXPORT_SYMBOL_GPL(close_candev); +static int can_set_termination(struct net_device *ndev, u16 term) +{ + struct can_priv *priv = netdev_priv(ndev); + int set; + + if (term == priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED]) + set = 1; + else + set = 0; + + gpiod_set_value(priv->termination_gpio, set); + + return 0; +} + +static int can_get_termination(struct net_device *ndev) +{ + struct can_priv *priv = netdev_priv(ndev); + struct device *dev = ndev->dev.parent; + struct gpio_desc *gpio; + u32 term; + int ret; + + /* Disabling termination by default is the safe choice: Else if many + * bus participants enable it, no communication is possible at all. + */ + gpio = devm_gpiod_get_optional(dev, "termination", GPIOD_OUT_LOW); + if (IS_ERR(gpio)) + return dev_err_probe(dev, PTR_ERR(gpio), + "Cannot get termination-gpios\n"); + + if (!gpio) + return 0; + + ret = device_property_read_u32(dev, "termination-ohms", &term); + if (ret) { + netdev_err(ndev, "Cannot get termination-ohms: %pe\n", + ERR_PTR(ret)); + return ret; + } + + if (term > U16_MAX) { + netdev_err(ndev, "Invalid termination-ohms value (%u > %u)\n", + term, U16_MAX); + return -EINVAL; + } + + priv->termination_const_cnt = ARRAY_SIZE(priv->termination_gpio_ohms); + priv->termination_const = priv->termination_gpio_ohms; + priv->termination_gpio = gpio; + priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_DISABLED] = + CAN_TERMINATION_DISABLED; + priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED] = term; + priv->do_set_termination = can_set_termination; + + return 0; +} + /* Register the CAN network device */ int register_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); + int err; /* Ensure termination_const, termination_const_cnt and * do_set_termination consistency. All must be either set or @@ -419,6 +479,12 @@ int register_candev(struct net_device *dev) if (!priv->data_bitrate_const != !priv->data_bitrate_const_cnt) return -EINVAL; + if (!priv->termination_const) { + err = can_get_termination(dev); + if (err) + return err; + } + dev->rtnl_link_ops = &can_link_ops; netif_carrier_off(dev); diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 147c23d7dab7..80425636049d 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -116,7 +116,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], maskedflags = cm->flags & cm->mask; /* check whether provided bits are allowed to be passed */ - if (cm->mask & ~(priv->ctrlmode_supported | ctrlstatic)) + if (maskedflags & ~(priv->ctrlmode_supported | ctrlstatic)) return -EOPNOTSUPP; /* do not check for static fd-non-iso if 'fd' is disabled */ diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 0cffaad905c2..2470c47b2e31 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -279,7 +279,7 @@ enum m_can_reg { /* Message RAM Elements */ #define M_CAN_FIFO_ID 0x0 #define M_CAN_FIFO_DLC 0x4 -#define M_CAN_FIFO_DATA(n) (0x8 + ((n) << 2)) +#define M_CAN_FIFO_DATA 0x8 /* Rx Buffer Element */ /* R0 */ @@ -309,6 +309,15 @@ enum m_can_reg { #define TX_EVENT_MM_MASK GENMASK(31, 24) #define TX_EVENT_TXTS_MASK GENMASK(15, 0) +/* The ID and DLC registers are adjacent in M_CAN FIFO memory, + * and we can save a (potentially slow) bus round trip by combining + * reads and writes to them. + */ +struct id_and_dlc { + u32 id; + u32 dlc; +}; + static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg) { return cdev->ops->read_reg(cdev, reg); @@ -320,36 +329,39 @@ static inline void m_can_write(struct m_can_classdev *cdev, enum m_can_reg reg, cdev->ops->write_reg(cdev, reg, val); } -static u32 m_can_fifo_read(struct m_can_classdev *cdev, - u32 fgi, unsigned int offset) +static int +m_can_fifo_read(struct m_can_classdev *cdev, + u32 fgi, unsigned int offset, void *val, size_t val_count) { u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE + offset; - return cdev->ops->read_fifo(cdev, addr_offset); + return cdev->ops->read_fifo(cdev, addr_offset, val, val_count); } -static void m_can_fifo_write(struct m_can_classdev *cdev, - u32 fpi, unsigned int offset, u32 val) +static int +m_can_fifo_write(struct m_can_classdev *cdev, + u32 fpi, unsigned int offset, const void *val, size_t val_count) { u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE + offset; - cdev->ops->write_fifo(cdev, addr_offset, val); + return cdev->ops->write_fifo(cdev, addr_offset, val, val_count); } -static inline void m_can_fifo_write_no_off(struct m_can_classdev *cdev, - u32 fpi, u32 val) +static inline int m_can_fifo_write_no_off(struct m_can_classdev *cdev, + u32 fpi, u32 val) { - cdev->ops->write_fifo(cdev, fpi, val); + return cdev->ops->write_fifo(cdev, fpi, &val, 1); } -static u32 m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset) +static int +m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset, u32 *val) { u32 addr_offset = cdev->mcfg[MRAM_TXE].off + fgi * TXE_ELEMENT_SIZE + offset; - return cdev->ops->read_fifo(cdev, addr_offset); + return cdev->ops->read_fifo(cdev, addr_offset, val, 1); } static inline bool m_can_tx_fifo_full(struct m_can_classdev *cdev) @@ -437,7 +449,7 @@ static void m_can_clean(struct net_device *net) * napi. For non-peripherals, RX is done in napi already, so push * directly. timestamp is used to ensure good skb ordering in * rx-offload and is ignored for non-peripherals. -*/ + */ static void m_can_receive_skb(struct m_can_classdev *cdev, struct sk_buff *skb, u32 timestamp) @@ -455,54 +467,57 @@ static void m_can_receive_skb(struct m_can_classdev *cdev, } } -static void m_can_read_fifo(struct net_device *dev, u32 rxfs) +static int m_can_read_fifo(struct net_device *dev, u32 rxfs) { struct net_device_stats *stats = &dev->stats; struct m_can_classdev *cdev = netdev_priv(dev); struct canfd_frame *cf; struct sk_buff *skb; - u32 id, fgi, dlc; + struct id_and_dlc fifo_header; + u32 fgi; u32 timestamp = 0; - int i; + int err; /* calculate the fifo get index for where to read data */ fgi = FIELD_GET(RXFS_FGI_MASK, rxfs); - dlc = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DLC); - if (dlc & RX_BUF_FDF) + err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_ID, &fifo_header, 2); + if (err) + goto out_fail; + + if (fifo_header.dlc & RX_BUF_FDF) skb = alloc_canfd_skb(dev, &cf); else skb = alloc_can_skb(dev, (struct can_frame **)&cf); if (!skb) { stats->rx_dropped++; - return; + return 0; } - if (dlc & RX_BUF_FDF) - cf->len = can_fd_dlc2len((dlc >> 16) & 0x0F); + if (fifo_header.dlc & RX_BUF_FDF) + cf->len = can_fd_dlc2len((fifo_header.dlc >> 16) & 0x0F); else - cf->len = can_cc_dlc2len((dlc >> 16) & 0x0F); + cf->len = can_cc_dlc2len((fifo_header.dlc >> 16) & 0x0F); - id = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_ID); - if (id & RX_BUF_XTD) - cf->can_id = (id & CAN_EFF_MASK) | CAN_EFF_FLAG; + if (fifo_header.id & RX_BUF_XTD) + cf->can_id = (fifo_header.id & CAN_EFF_MASK) | CAN_EFF_FLAG; else - cf->can_id = (id >> 18) & CAN_SFF_MASK; + cf->can_id = (fifo_header.id >> 18) & CAN_SFF_MASK; - if (id & RX_BUF_ESI) { + if (fifo_header.id & RX_BUF_ESI) { cf->flags |= CANFD_ESI; netdev_dbg(dev, "ESI Error\n"); } - if (!(dlc & RX_BUF_FDF) && (id & RX_BUF_RTR)) { + if (!(fifo_header.dlc & RX_BUF_FDF) && (fifo_header.id & RX_BUF_RTR)) { cf->can_id |= CAN_RTR_FLAG; } else { - if (dlc & RX_BUF_BRS) + if (fifo_header.dlc & RX_BUF_BRS) cf->flags |= CANFD_BRS; - for (i = 0; i < cf->len; i += 4) - *(u32 *)(cf->data + i) = - m_can_fifo_read(cdev, fgi, - M_CAN_FIFO_DATA(i / 4)); + err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DATA, + cf->data, DIV_ROUND_UP(cf->len, 4)); + if (err) + goto out_fail; } /* acknowledge rx fifo 0 */ @@ -511,9 +526,15 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs) stats->rx_packets++; stats->rx_bytes += cf->len; - timestamp = FIELD_GET(RX_BUF_RXTS_MASK, dlc); + timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc); m_can_receive_skb(cdev, skb, timestamp); + + return 0; + +out_fail: + netdev_err(dev, "FIFO read returned %d\n", err); + return err; } static int m_can_do_rx_poll(struct net_device *dev, int quota) @@ -521,6 +542,7 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota) struct m_can_classdev *cdev = netdev_priv(dev); u32 pkts = 0; u32 rxfs; + int err; rxfs = m_can_read(cdev, M_CAN_RXF0S); if (!(rxfs & RXFS_FFL_MASK)) { @@ -529,7 +551,9 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota) } while ((rxfs & RXFS_FFL_MASK) && (quota > 0)) { - m_can_read_fifo(dev, rxfs); + err = m_can_read_fifo(dev, rxfs); + if (err) + return err; quota--; pkts++; @@ -875,6 +899,7 @@ static int m_can_handle_bus_errors(struct net_device *dev, u32 irqstatus, static int m_can_rx_handler(struct net_device *dev, int quota) { struct m_can_classdev *cdev = netdev_priv(dev); + int rx_work_or_err; int work_done = 0; u32 irqstatus, psr; @@ -911,8 +936,13 @@ static int m_can_rx_handler(struct net_device *dev, int quota) if (irqstatus & IR_ERR_BUS_30X) work_done += m_can_handle_bus_errors(dev, irqstatus, psr); - if (irqstatus & IR_RF0N) - work_done += m_can_do_rx_poll(dev, (quota - work_done)); + if (irqstatus & IR_RF0N) { + rx_work_or_err = m_can_do_rx_poll(dev, (quota - work_done)); + if (rx_work_or_err < 0) + return rx_work_or_err; + + work_done += rx_work_or_err; + } end: return work_done; } @@ -920,12 +950,17 @@ end: static int m_can_rx_peripheral(struct net_device *dev) { struct m_can_classdev *cdev = netdev_priv(dev); + int work_done; - m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT); + work_done = m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT); - m_can_enable_all_interrupts(cdev); + /* Don't re-enable interrupts if the driver had a fatal error + * (e.g., FIFO read failure). + */ + if (work_done >= 0) + m_can_enable_all_interrupts(cdev); - return 0; + return work_done; } static int m_can_poll(struct napi_struct *napi, int quota) @@ -935,7 +970,11 @@ static int m_can_poll(struct napi_struct *napi, int quota) int work_done; work_done = m_can_rx_handler(dev, quota); - if (work_done < quota) { + + /* Don't re-enable interrupts if the driver had a fatal error + * (e.g., FIFO read failure). + */ + if (work_done >= 0 && work_done < quota) { napi_complete_done(napi, work_done); m_can_enable_all_interrupts(cdev); } @@ -946,7 +985,7 @@ static int m_can_poll(struct napi_struct *napi, int quota) /* Echo tx skb and update net stats. Peripherals use rx-offload for * echo. timestamp is used for peripherals to ensure correct ordering * by rx-offload, and is ignored for non-peripherals. -*/ + */ static void m_can_tx_update_stats(struct m_can_classdev *cdev, unsigned int msg_mark, u32 timestamp) @@ -966,7 +1005,7 @@ static void m_can_tx_update_stats(struct m_can_classdev *cdev, stats->tx_packets++; } -static void m_can_echo_tx_event(struct net_device *dev) +static int m_can_echo_tx_event(struct net_device *dev) { u32 txe_count = 0; u32 m_can_txefs; @@ -985,12 +1024,18 @@ static void m_can_echo_tx_event(struct net_device *dev) /* Get and process all sent elements */ for (i = 0; i < txe_count; i++) { u32 txe, timestamp = 0; + int err; /* retrieve get index */ fgi = FIELD_GET(TXEFS_EFGI_MASK, m_can_read(cdev, M_CAN_TXEFS)); /* get message marker, timestamp */ - txe = m_can_txe_fifo_read(cdev, fgi, 4); + err = m_can_txe_fifo_read(cdev, fgi, 4, &txe); + if (err) { + netdev_err(dev, "TXE FIFO read returned %d\n", err); + return err; + } + msg_mark = FIELD_GET(TX_EVENT_MM_MASK, txe); timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe); @@ -1001,6 +1046,8 @@ static void m_can_echo_tx_event(struct net_device *dev) /* update stats */ m_can_tx_update_stats(cdev, msg_mark, timestamp); } + + return 0; } static irqreturn_t m_can_isr(int irq, void *dev_id) @@ -1032,8 +1079,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) m_can_disable_all_interrupts(cdev); if (!cdev->is_peripheral) napi_schedule(&cdev->napi); - else - m_can_rx_peripheral(dev); + else if (m_can_rx_peripheral(dev) < 0) + goto out_fail; } if (cdev->version == 30) { @@ -1051,7 +1098,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) } else { if (ir & IR_TEFN) { /* New TX FIFO Element arrived */ - m_can_echo_tx_event(dev); + if (m_can_echo_tx_event(dev) != 0) + goto out_fail; + can_led_event(dev, CAN_LED_EVENT_TX); if (netif_queue_stopped(dev) && !m_can_tx_fifo_full(cdev)) @@ -1063,6 +1112,10 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) can_rx_offload_threaded_irq_finish(&cdev->offload); return IRQ_HANDLED; + +out_fail: + m_can_disable_all_interrupts(cdev); + return IRQ_HANDLED; } static const struct can_bittiming_const m_can_bittiming_const_30X = { @@ -1168,10 +1221,10 @@ static int m_can_set_bittiming(struct net_device *dev) FIELD_PREP(TDCR_TDCO_MASK, tdco)); } - reg_btp = FIELD_PREP(NBTP_NBRP_MASK, brp) | - FIELD_PREP(NBTP_NSJW_MASK, sjw) | - FIELD_PREP(NBTP_NTSEG1_MASK, tseg1) | - FIELD_PREP(NBTP_NTSEG2_MASK, tseg2); + reg_btp |= FIELD_PREP(DBTP_DBRP_MASK, brp) | + FIELD_PREP(DBTP_DSJW_MASK, sjw) | + FIELD_PREP(DBTP_DTSEG1_MASK, tseg1) | + FIELD_PREP(DBTP_DTSEG2_MASK, tseg2); m_can_write(cdev, M_CAN_DBTP, reg_btp); } @@ -1306,7 +1359,8 @@ static void m_can_chip_config(struct net_device *dev) m_can_set_bittiming(dev); /* enable internal timestamp generation, with a prescalar of 16. The - * prescalar is applied to the nominal bit timing */ + * prescalar is applied to the nominal bit timing + */ m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf)); m_can_config_endisable(cdev, false); @@ -1534,8 +1588,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) struct canfd_frame *cf = (struct canfd_frame *)cdev->tx_skb->data; struct net_device *dev = cdev->net; struct sk_buff *skb = cdev->tx_skb; - u32 id, cccr, fdflags; - int i; + struct id_and_dlc fifo_header; + u32 cccr, fdflags; + int err; int putidx; cdev->tx_skb = NULL; @@ -1543,27 +1598,29 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) /* Generate ID field for TX buffer Element */ /* Common to all supported M_CAN versions */ if (cf->can_id & CAN_EFF_FLAG) { - id = cf->can_id & CAN_EFF_MASK; - id |= TX_BUF_XTD; + fifo_header.id = cf->can_id & CAN_EFF_MASK; + fifo_header.id |= TX_BUF_XTD; } else { - id = ((cf->can_id & CAN_SFF_MASK) << 18); + fifo_header.id = ((cf->can_id & CAN_SFF_MASK) << 18); } if (cf->can_id & CAN_RTR_FLAG) - id |= TX_BUF_RTR; + fifo_header.id |= TX_BUF_RTR; if (cdev->version == 30) { netif_stop_queue(dev); - /* message ram configuration */ - m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, id); - m_can_fifo_write(cdev, 0, M_CAN_FIFO_DLC, - can_fd_len2dlc(cf->len) << 16); + fifo_header.dlc = can_fd_len2dlc(cf->len) << 16; - for (i = 0; i < cf->len; i += 4) - m_can_fifo_write(cdev, 0, - M_CAN_FIFO_DATA(i / 4), - *(u32 *)(cf->data + i)); + /* Write the frame ID, DLC, and payload to the FIFO element. */ + err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_header, 2); + if (err) + goto out_fail; + + err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_DATA, + cf->data, DIV_ROUND_UP(cf->len, 4)); + if (err) + goto out_fail; can_put_echo_skb(skb, dev, 0, 0); @@ -1607,8 +1664,11 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) /* get put index for frame */ putidx = FIELD_GET(TXFQS_TFQPI_MASK, m_can_read(cdev, M_CAN_TXFQS)); - /* Write ID Field to FIFO Element */ - m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, id); + + /* Construct DLC Field, with CAN-FD configuration. + * Use the put index of the fifo as the message marker, + * used in the TX interrupt for sending the correct echo frame. + */ /* get CAN FD configuration of frame */ fdflags = 0; @@ -1618,20 +1678,17 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) fdflags |= TX_BUF_BRS; } - /* Construct DLC Field. Also contains CAN-FD configuration - * use put index of fifo as message marker - * it is used in TX interrupt for - * sending the correct echo frame - */ - m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DLC, - FIELD_PREP(TX_BUF_MM_MASK, putidx) | - FIELD_PREP(TX_BUF_DLC_MASK, - can_fd_len2dlc(cf->len)) | - fdflags | TX_BUF_EFC); + fifo_header.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) | + FIELD_PREP(TX_BUF_DLC_MASK, can_fd_len2dlc(cf->len)) | + fdflags | TX_BUF_EFC; + err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, &fifo_header, 2); + if (err) + goto out_fail; - for (i = 0; i < cf->len; i += 4) - m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA(i / 4), - *(u32 *)(cf->data + i)); + err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA, + cf->data, DIV_ROUND_UP(cf->len, 4)); + if (err) + goto out_fail; /* Push loopback echo. * Will be looped back on TX interrupt based on message marker @@ -1648,6 +1705,11 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) } return NETDEV_TX_OK; + +out_fail: + netdev_err(dev, "FIFO write returned %d\n", err); + m_can_disable_all_interrupts(cdev); + return NETDEV_TX_BUSY; } static void m_can_tx_work_queue(struct work_struct *ws) @@ -1819,9 +1881,10 @@ static void m_can_of_parse_mram(struct m_can_classdev *cdev, cdev->mcfg[MRAM_TXB].off, cdev->mcfg[MRAM_TXB].num); } -void m_can_init_ram(struct m_can_classdev *cdev) +int m_can_init_ram(struct m_can_classdev *cdev) { int end, i, start; + int err = 0; /* initialize the entire Message RAM in use to avoid possible * ECC/parity checksum errors when reading an uninitialized buffer @@ -1830,8 +1893,13 @@ void m_can_init_ram(struct m_can_classdev *cdev) end = cdev->mcfg[MRAM_TXB].off + cdev->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE; - for (i = start; i < end; i += 4) - m_can_fifo_write_no_off(cdev, i, 0x0); + for (i = start; i < end; i += 4) { + err = m_can_fifo_write_no_off(cdev, i, 0x0); + if (err) + break; + } + + return err; } EXPORT_SYMBOL_GPL(m_can_init_ram); diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index 56e994376a7b..d18b515e6ccc 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -65,9 +65,9 @@ struct m_can_ops { int (*clear_interrupts)(struct m_can_classdev *cdev); u32 (*read_reg)(struct m_can_classdev *cdev, int reg); int (*write_reg)(struct m_can_classdev *cdev, int reg, int val); - u32 (*read_fifo)(struct m_can_classdev *cdev, int addr_offset); + int (*read_fifo)(struct m_can_classdev *cdev, int addr_offset, void *val, size_t val_count); int (*write_fifo)(struct m_can_classdev *cdev, int addr_offset, - int val); + const void *val, size_t val_count); int (*init)(struct m_can_classdev *cdev); }; @@ -101,7 +101,7 @@ void m_can_class_free_dev(struct net_device *net); int m_can_class_register(struct m_can_classdev *cdev); void m_can_class_unregister(struct m_can_classdev *cdev); int m_can_class_get_clocks(struct m_can_classdev *cdev); -void m_can_init_ram(struct m_can_classdev *priv); +int m_can_init_ram(struct m_can_classdev *priv); int m_can_class_suspend(struct device *dev); int m_can_class_resume(struct device *dev); diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 128808605c3f..89cc3d41e952 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -39,11 +39,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg) return readl(priv->base + reg); } -static u32 iomap_read_fifo(struct m_can_classdev *cdev, int offset) +static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count) { struct m_can_pci_priv *priv = cdev_to_priv(cdev); - return readl(priv->base + offset); + ioread32_rep(priv->base + offset, val, val_count); + + return 0; } static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val) @@ -55,11 +57,12 @@ static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val) return 0; } -static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, int val) +static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, + const void *val, size_t val_count) { struct m_can_pci_priv *priv = cdev_to_priv(cdev); - writel(val, priv->base + offset); + iowrite32_rep(priv->base + offset, val, val_count); return 0; } diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index a28c84aa8fa8..308d4f2fff00 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -29,11 +29,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg) return readl(priv->base + reg); } -static u32 iomap_read_fifo(struct m_can_classdev *cdev, int offset) +static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count) { struct m_can_plat_priv *priv = cdev_to_priv(cdev); - return readl(priv->mram_base + offset); + ioread32_rep(priv->mram_base + offset, val, val_count); + + return 0; } static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val) @@ -45,11 +47,12 @@ static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val) return 0; } -static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, int val) +static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, + const void *val, size_t val_count) { struct m_can_plat_priv *priv = cdev_to_priv(cdev); - writel(val, priv->mram_base + offset); + iowrite32_rep(priv->base + offset, val, val_count); return 0; } @@ -127,7 +130,9 @@ static int m_can_plat_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mcan_class); - m_can_init_ram(mcan_class); + ret = m_can_init_ram(mcan_class); + if (ret) + goto probe_fail; pm_runtime_enable(mcan_class->dev); ret = m_can_class_register(mcan_class); diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 4147cecfbbd6..04687b15b250 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -105,7 +105,6 @@ static inline struct tcan4x5x_priv *cdev_to_priv(struct m_can_classdev *cdev) { return container_of(cdev, struct tcan4x5x_priv, cdev); - } static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv) @@ -154,14 +153,12 @@ static u32 tcan4x5x_read_reg(struct m_can_classdev *cdev, int reg) return val; } -static u32 tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset) +static int tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset, + void *val, size_t val_count) { struct tcan4x5x_priv *priv = cdev_to_priv(cdev); - u32 val; - - regmap_read(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, &val); - return val; + return regmap_bulk_read(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val, val_count); } static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val) @@ -172,11 +169,11 @@ static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val) } static int tcan4x5x_write_fifo(struct m_can_classdev *cdev, - int addr_offset, int val) + int addr_offset, const void *val, size_t val_count) { struct tcan4x5x_priv *priv = cdev_to_priv(cdev); - return regmap_write(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val); + return regmap_bulk_write(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val, val_count); } static int tcan4x5x_power_enable(struct regulator *reg, int enable) @@ -238,7 +235,9 @@ static int tcan4x5x_init(struct m_can_classdev *cdev) return ret; /* Zero out the MCAN buffers */ - m_can_init_ram(cdev); + ret = m_can_init_ram(cdev); + if (ret) + return ret; ret = regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG, TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_NORMAL); diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index 311e6ca3bdc4..5d4d52afde15 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -37,9 +37,15 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/iopoll.h> +#include <linux/reset.h> #define RCANFD_DRV_NAME "rcar_canfd" +enum rcanfd_chip_id { + RENESAS_RCAR_GEN3 = 0, + RENESAS_RZG2L, +}; + /* Global register bits */ /* RSCFDnCFDGRMCFG */ @@ -513,6 +519,9 @@ struct rcar_canfd_global { enum rcar_canfd_fcanclk fcan; /* CANFD or Ext clock */ unsigned long channels_mask; /* Enabled channels mask */ bool fdmode; /* CAN FD or Classical CAN only mode */ + struct reset_control *rstc1; + struct reset_control *rstc2; + enum rcanfd_chip_id chip_id; }; /* CAN FD mode nominal rate constants */ @@ -1070,38 +1079,70 @@ static void rcar_canfd_tx_done(struct net_device *ndev) can_led_event(ndev, CAN_LED_EVENT_TX); } +static void rcar_canfd_handle_global_err(struct rcar_canfd_global *gpriv, u32 ch) +{ + struct rcar_canfd_channel *priv = gpriv->ch[ch]; + struct net_device *ndev = priv->ndev; + u32 gerfl; + + /* Handle global error interrupts */ + gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL); + if (unlikely(RCANFD_GERFL_ERR(gpriv, gerfl))) + rcar_canfd_global_error(ndev); +} + +static irqreturn_t rcar_canfd_global_err_interrupt(int irq, void *dev_id) +{ + struct rcar_canfd_global *gpriv = dev_id; + u32 ch; + + for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) + rcar_canfd_handle_global_err(gpriv, ch); + + return IRQ_HANDLED; +} + +static void rcar_canfd_handle_global_receive(struct rcar_canfd_global *gpriv, u32 ch) +{ + struct rcar_canfd_channel *priv = gpriv->ch[ch]; + u32 ridx = ch + RCANFD_RFFIFO_IDX; + u32 sts; + + /* Handle Rx interrupts */ + sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx)); + if (likely(sts & RCANFD_RFSTS_RFIF)) { + if (napi_schedule_prep(&priv->napi)) { + /* Disable Rx FIFO interrupts */ + rcar_canfd_clear_bit(priv->base, + RCANFD_RFCC(ridx), + RCANFD_RFCC_RFIE); + __napi_schedule(&priv->napi); + } + } +} + +static irqreturn_t rcar_canfd_global_receive_fifo_interrupt(int irq, void *dev_id) +{ + struct rcar_canfd_global *gpriv = dev_id; + u32 ch; + + for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) + rcar_canfd_handle_global_receive(gpriv, ch); + + return IRQ_HANDLED; +} + static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id) { struct rcar_canfd_global *gpriv = dev_id; - struct net_device *ndev; - struct rcar_canfd_channel *priv; - u32 sts, gerfl; - u32 ch, ridx; + u32 ch; /* Global error interrupts still indicate a condition specific * to a channel. RxFIFO interrupt is a global interrupt. */ for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) { - priv = gpriv->ch[ch]; - ndev = priv->ndev; - ridx = ch + RCANFD_RFFIFO_IDX; - - /* Global error interrupts */ - gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL); - if (unlikely(RCANFD_GERFL_ERR(gpriv, gerfl))) - rcar_canfd_global_error(ndev); - - /* Handle Rx interrupts */ - sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx)); - if (likely(sts & RCANFD_RFSTS_RFIF)) { - if (napi_schedule_prep(&priv->napi)) { - /* Disable Rx FIFO interrupts */ - rcar_canfd_clear_bit(priv->base, - RCANFD_RFCC(ridx), - RCANFD_RFCC_RFIE); - __napi_schedule(&priv->napi); - } - } + rcar_canfd_handle_global_err(gpriv, ch); + rcar_canfd_handle_global_receive(gpriv, ch); } return IRQ_HANDLED; } @@ -1139,38 +1180,73 @@ static void rcar_canfd_state_change(struct net_device *ndev, } } -static irqreturn_t rcar_canfd_channel_interrupt(int irq, void *dev_id) +static void rcar_canfd_handle_channel_tx(struct rcar_canfd_global *gpriv, u32 ch) +{ + struct rcar_canfd_channel *priv = priv = gpriv->ch[ch]; + struct net_device *ndev = priv->ndev; + u32 sts; + + /* Handle Tx interrupts */ + sts = rcar_canfd_read(priv->base, + RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX)); + if (likely(sts & RCANFD_CFSTS_CFTXIF)) + rcar_canfd_tx_done(ndev); +} + +static irqreturn_t rcar_canfd_channel_tx_interrupt(int irq, void *dev_id) { struct rcar_canfd_global *gpriv = dev_id; - struct net_device *ndev; - struct rcar_canfd_channel *priv; - u32 sts, ch, cerfl; + u32 ch; + + for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) + rcar_canfd_handle_channel_tx(gpriv, ch); + + return IRQ_HANDLED; +} + +static void rcar_canfd_handle_channel_err(struct rcar_canfd_global *gpriv, u32 ch) +{ + struct rcar_canfd_channel *priv = gpriv->ch[ch]; + struct net_device *ndev = priv->ndev; u16 txerr, rxerr; + u32 sts, cerfl; + + /* Handle channel error interrupts */ + cerfl = rcar_canfd_read(priv->base, RCANFD_CERFL(ch)); + sts = rcar_canfd_read(priv->base, RCANFD_CSTS(ch)); + txerr = RCANFD_CSTS_TECCNT(sts); + rxerr = RCANFD_CSTS_RECCNT(sts); + if (unlikely(RCANFD_CERFL_ERR(cerfl))) + rcar_canfd_error(ndev, cerfl, txerr, rxerr); + + /* Handle state change to lower states */ + if (unlikely(priv->can.state != CAN_STATE_ERROR_ACTIVE && + priv->can.state != CAN_STATE_BUS_OFF)) + rcar_canfd_state_change(ndev, txerr, rxerr); +} + +static irqreturn_t rcar_canfd_channel_err_interrupt(int irq, void *dev_id) +{ + struct rcar_canfd_global *gpriv = dev_id; + u32 ch; + + for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) + rcar_canfd_handle_channel_err(gpriv, ch); + + return IRQ_HANDLED; +} + +static irqreturn_t rcar_canfd_channel_interrupt(int irq, void *dev_id) +{ + struct rcar_canfd_global *gpriv = dev_id; + u32 ch; /* Common FIFO is a per channel resource */ for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) { - priv = gpriv->ch[ch]; - ndev = priv->ndev; - - /* Channel error interrupts */ - cerfl = rcar_canfd_read(priv->base, RCANFD_CERFL(ch)); - sts = rcar_canfd_read(priv->base, RCANFD_CSTS(ch)); - txerr = RCANFD_CSTS_TECCNT(sts); - rxerr = RCANFD_CSTS_RECCNT(sts); - if (unlikely(RCANFD_CERFL_ERR(cerfl))) - rcar_canfd_error(ndev, cerfl, txerr, rxerr); - - /* Handle state change to lower states */ - if (unlikely((priv->can.state != CAN_STATE_ERROR_ACTIVE) && - (priv->can.state != CAN_STATE_BUS_OFF))) - rcar_canfd_state_change(ndev, txerr, rxerr); - - /* Handle Tx interrupts */ - sts = rcar_canfd_read(priv->base, - RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX)); - if (likely(sts & RCANFD_CFSTS_CFTXIF)) - rcar_canfd_tx_done(ndev); + rcar_canfd_handle_channel_err(gpriv, ch); + rcar_canfd_handle_channel_tx(gpriv, ch); } + return IRQ_HANDLED; } @@ -1577,6 +1653,53 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, priv->can.clock.freq = fcan_freq; dev_info(&pdev->dev, "can_clk rate is %u\n", priv->can.clock.freq); + if (gpriv->chip_id == RENESAS_RZG2L) { + char *irq_name; + int err_irq; + int tx_irq; + + err_irq = platform_get_irq_byname(pdev, ch == 0 ? "ch0_err" : "ch1_err"); + if (err_irq < 0) { + err = err_irq; + goto fail; + } + + tx_irq = platform_get_irq_byname(pdev, ch == 0 ? "ch0_trx" : "ch1_trx"); + if (tx_irq < 0) { + err = tx_irq; + goto fail; + } + + irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "canfd.ch%d_err", ch); + if (!irq_name) { + err = -ENOMEM; + goto fail; + } + err = devm_request_irq(&pdev->dev, err_irq, + rcar_canfd_channel_err_interrupt, 0, + irq_name, gpriv); + if (err) { + dev_err(&pdev->dev, "devm_request_irq CH Err(%d) failed, error %d\n", + err_irq, err); + goto fail; + } + irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "canfd.ch%d_trx", ch); + if (!irq_name) { + err = -ENOMEM; + goto fail; + } + err = devm_request_irq(&pdev->dev, tx_irq, + rcar_canfd_channel_tx_interrupt, 0, + irq_name, gpriv); + if (err) { + dev_err(&pdev->dev, "devm_request_irq Tx (%d) failed, error %d\n", + tx_irq, err); + goto fail; + } + } + if (gpriv->fdmode) { priv->can.bittiming_const = &rcar_canfd_nom_bittiming_const; priv->can.data_bittiming_const = @@ -1636,7 +1759,11 @@ static int rcar_canfd_probe(struct platform_device *pdev) struct device_node *of_child; unsigned long channels_mask = 0; int err, ch_irq, g_irq; + int g_err_irq, g_recc_irq; bool fdmode = true; /* CAN FD only mode - default */ + enum rcanfd_chip_id chip_id; + + chip_id = (uintptr_t)of_device_get_match_data(&pdev->dev); if (of_property_read_bool(pdev->dev.of_node, "renesas,no-can-fd")) fdmode = false; /* Classical CAN only mode */ @@ -1649,16 +1776,30 @@ static int rcar_canfd_probe(struct platform_device *pdev) if (of_child && of_device_is_available(of_child)) channels_mask |= BIT(1); /* Channel 1 */ - ch_irq = platform_get_irq(pdev, 0); - if (ch_irq < 0) { - err = ch_irq; - goto fail_dev; - } + if (chip_id == RENESAS_RCAR_GEN3) { + ch_irq = platform_get_irq_byname_optional(pdev, "ch_int"); + if (ch_irq < 0) { + /* For backward compatibility get irq by index */ + ch_irq = platform_get_irq(pdev, 0); + if (ch_irq < 0) + return ch_irq; + } - g_irq = platform_get_irq(pdev, 1); - if (g_irq < 0) { - err = g_irq; - goto fail_dev; + g_irq = platform_get_irq_byname_optional(pdev, "g_int"); + if (g_irq < 0) { + /* For backward compatibility get irq by index */ + g_irq = platform_get_irq(pdev, 1); + if (g_irq < 0) + return g_irq; + } + } else { + g_err_irq = platform_get_irq_byname(pdev, "g_err"); + if (g_err_irq < 0) + return g_err_irq; + + g_recc_irq = platform_get_irq_byname(pdev, "g_recc"); + if (g_recc_irq < 0) + return g_recc_irq; } /* Global controller context */ @@ -1670,6 +1811,19 @@ static int rcar_canfd_probe(struct platform_device *pdev) gpriv->pdev = pdev; gpriv->channels_mask = channels_mask; gpriv->fdmode = fdmode; + gpriv->chip_id = chip_id; + + if (gpriv->chip_id == RENESAS_RZG2L) { + gpriv->rstc1 = devm_reset_control_get_exclusive(&pdev->dev, "rstp_n"); + if (IS_ERR(gpriv->rstc1)) + return dev_err_probe(&pdev->dev, PTR_ERR(gpriv->rstc1), + "failed to get rstp_n\n"); + + gpriv->rstc2 = devm_reset_control_get_exclusive(&pdev->dev, "rstc_n"); + if (IS_ERR(gpriv->rstc2)) + return dev_err_probe(&pdev->dev, PTR_ERR(gpriv->rstc2), + "failed to get rstc_n\n"); + } /* Peripheral clock */ gpriv->clkp = devm_clk_get(&pdev->dev, "fck"); @@ -1699,7 +1853,7 @@ static int rcar_canfd_probe(struct platform_device *pdev) } fcan_freq = clk_get_rate(gpriv->can_clk); - if (gpriv->fcan == RCANFD_CANFDCLK) + if (gpriv->fcan == RCANFD_CANFDCLK && gpriv->chip_id == RENESAS_RCAR_GEN3) /* CANFD clock is further divided by (1/2) within the IP */ fcan_freq /= 2; @@ -1711,20 +1865,51 @@ static int rcar_canfd_probe(struct platform_device *pdev) gpriv->base = addr; /* Request IRQ that's common for both channels */ - err = devm_request_irq(&pdev->dev, ch_irq, - rcar_canfd_channel_interrupt, 0, - "canfd.chn", gpriv); - if (err) { - dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n", - ch_irq, err); - goto fail_dev; + if (gpriv->chip_id == RENESAS_RCAR_GEN3) { + err = devm_request_irq(&pdev->dev, ch_irq, + rcar_canfd_channel_interrupt, 0, + "canfd.ch_int", gpriv); + if (err) { + dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n", + ch_irq, err); + goto fail_dev; + } + + err = devm_request_irq(&pdev->dev, g_irq, + rcar_canfd_global_interrupt, 0, + "canfd.g_int", gpriv); + if (err) { + dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n", + g_irq, err); + goto fail_dev; + } + } else { + err = devm_request_irq(&pdev->dev, g_recc_irq, + rcar_canfd_global_receive_fifo_interrupt, 0, + "canfd.g_recc", gpriv); + + if (err) { + dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n", + g_recc_irq, err); + goto fail_dev; + } + + err = devm_request_irq(&pdev->dev, g_err_irq, + rcar_canfd_global_err_interrupt, 0, + "canfd.g_err", gpriv); + if (err) { + dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n", + g_err_irq, err); + goto fail_dev; + } } - err = devm_request_irq(&pdev->dev, g_irq, - rcar_canfd_global_interrupt, 0, - "canfd.gbl", gpriv); + + err = reset_control_reset(gpriv->rstc1); + if (err) + goto fail_dev; + err = reset_control_reset(gpriv->rstc2); if (err) { - dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n", - g_irq, err); + reset_control_assert(gpriv->rstc1); goto fail_dev; } @@ -1733,7 +1918,7 @@ static int rcar_canfd_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "failed to enable peripheral clock, error %d\n", err); - goto fail_dev; + goto fail_reset; } err = rcar_canfd_reset_controller(gpriv); @@ -1790,6 +1975,9 @@ fail_mode: rcar_canfd_disable_global_interrupts(gpriv); fail_clk: clk_disable_unprepare(gpriv->clkp); +fail_reset: + reset_control_assert(gpriv->rstc1); + reset_control_assert(gpriv->rstc2); fail_dev: return err; } @@ -1810,6 +1998,9 @@ static int rcar_canfd_remove(struct platform_device *pdev) /* Enter global sleep mode */ rcar_canfd_set_bit(gpriv->base, RCANFD_GCTR, RCANFD_GCTR_GSLPR); clk_disable_unprepare(gpriv->clkp); + reset_control_assert(gpriv->rstc1); + reset_control_assert(gpriv->rstc2); + return 0; } @@ -1827,7 +2018,8 @@ static SIMPLE_DEV_PM_OPS(rcar_canfd_pm_ops, rcar_canfd_suspend, rcar_canfd_resume); static const struct of_device_id rcar_canfd_of_table[] = { - { .compatible = "renesas,rcar-gen3-canfd" }, + { .compatible = "renesas,rcar-gen3-canfd", .data = (void *)RENESAS_RCAR_GEN3 }, + { .compatible = "renesas,rzg2l-canfd", .data = (void *)RENESAS_RZG2L }, { } }; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 6c369a399c45..673861ab665a 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1456,7 +1456,7 @@ mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv, } static void -mcp251xfd_hw_rx_obj_to_skb(struct mcp251xfd_priv *priv, +mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj, struct sk_buff *skb) { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c index ed3169274d24..712e09186987 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c @@ -13,7 +13,7 @@ static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc) { - struct mcp251xfd_priv *priv; + const struct mcp251xfd_priv *priv; u32 timestamp = 0; int err; @@ -39,7 +39,7 @@ static void mcp251xfd_timestamp_work(struct work_struct *work) MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ); } -void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv, +void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv, struct sk_buff *skb, u32 timestamp) { struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 1002f3902ad2..0f322dabaf65 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -853,7 +853,7 @@ int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv); u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size, const void *data, size_t data_size); u16 mcp251xfd_crc16_compute(const void *data, size_t data_size); -void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv, +void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv, struct sk_buff *skb, u32 timestamp); void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv); void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv); diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.h b/drivers/net/can/usb/etas_es58x/es58x_fd.h index ee18a87e40c0..a191891b8777 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_fd.h +++ b/drivers/net/can/usb/etas_es58x/es58x_fd.h @@ -96,23 +96,14 @@ struct es58x_fd_bittiming { * @ctrlmode: type enum es58x_fd_ctrlmode. * @canfd_enabled: boolean (0: Classical CAN, 1: CAN and/or CANFD). * @data_bittiming: Bittiming for flexible data-rate transmission. - * @tdc_enabled: Transmitter Delay Compensation switch (0: disabled, - * 1: enabled). On very high bitrates, the delay between when the - * bit is sent and received on the CANTX and CANRX pins of the - * transceiver start to be significant enough for errors to occur - * and thus need to be compensated. - * @tdco: Transmitter Delay Compensation Offset. Offset value, in time - * quanta, defining the delay between the start of the bit - * reception on the CANRX pin of the transceiver and the SSP - * (Secondary Sample Point). Valid values: 0 to 127. - * @tdcf: Transmitter Delay Compensation Filter window. Defines the - * minimum value for the SSP position, in time quanta. The - * feature is enabled when TDCF is configured to a value greater - * than TDCO. Valid values: 0 to 127. + * @tdc_enabled: Transmitter Delay Compensation switch (0: TDC is + * disabled, 1: TDC is enabled). + * @tdco: Transmitter Delay Compensation Offset. + * @tdcf: Transmitter Delay Compensation Filter window. * - * Please refer to the microcontroller datasheet: "SAM - * E701/S70/V70/V71 Family" section 49 "Controller Area Network - * (MCAN)" for additional information. + * Please refer to the microcontroller datasheet: "SAM E70/S70/V70/V71 + * Family" section 49 "Controller Area Network (MCAN)" for additional + * information. */ struct es58x_fd_tx_conf_msg { struct es58x_fd_bittiming nominal_bittiming; diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b23e3488695b..bd1417a66cbf 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_br_flags); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) -{ - b53_port_set_mcast_flood(ds->priv, port, mrouter); - - return 0; -} -EXPORT_SYMBOL(b53_set_mrouter); - static bool b53_possible_cpu_port(struct dsa_switch *ds, int port) { /* Broadcom switches will accept enabling Broadcom tags on the @@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, - .port_set_mrouter = b53_set_mrouter, .port_stp_state_set = b53_br_set_stp_state, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 82700a5714c1..9bf8319342b0 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port, int b53_br_flags(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); void b53_phylink_validate(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 3b018fcf4412..6ce9ec1283e0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, .port_stp_state_set = b53_br_set_stp_state, - .port_set_mrouter = b53_set_mrouter, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_add = b53_vlan_add, diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 9fdcc4bde480..5c54ae1be62c 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -912,6 +912,7 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port, { struct hellcreek *hellcreek = ds->priv; u16 entries; + int ret = 0; size_t i; mutex_lock(&hellcreek->reg_lock); @@ -943,12 +944,14 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port, if (!(entry.portmask & BIT(port))) continue; - cb(entry.mac, 0, entry.is_static, data); + ret = cb(entry.mac, 0, entry.is_static, data); + if (ret) + break; } mutex_unlock(&hellcreek->reg_lock); - return 0; + return ret; } static int hellcreek_vlan_filtering(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 344374025426..d7ce281570b5 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -557,12 +557,12 @@ static int lan9303_alr_make_entry_raw(struct lan9303 *chip, u32 dat0, u32 dat1) return 0; } -typedef void alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1, - int portmap, void *ctx); +typedef int alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1, + int portmap, void *ctx); -static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx) +static int lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx) { - int i; + int ret = 0, i; mutex_lock(&chip->alr_mutex); lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, @@ -582,13 +582,17 @@ static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx) LAN9303_ALR_DAT1_PORT_BITOFFS; portmap = alrport_2_portmap[alrport]; - cb(chip, dat0, dat1, portmap, ctx); + ret = cb(chip, dat0, dat1, portmap, ctx); + if (ret) + break; lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, LAN9303_ALR_CMD_GET_NEXT); lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0); } mutex_unlock(&chip->alr_mutex); + + return ret; } static void alr_reg_to_mac(u32 dat0, u32 dat1, u8 mac[6]) @@ -606,18 +610,20 @@ struct del_port_learned_ctx { }; /* Clear learned (non-static) entry on given port */ -static void alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0, - u32 dat1, int portmap, void *ctx) +static int alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0, + u32 dat1, int portmap, void *ctx) { struct del_port_learned_ctx *del_ctx = ctx; int port = del_ctx->port; if (((BIT(port) & portmap) == 0) || (dat1 & LAN9303_ALR_DAT1_STATIC)) - return; + return 0; /* learned entries has only one port, we can just delete */ dat1 &= ~LAN9303_ALR_DAT1_VALID; /* delete entry */ lan9303_alr_make_entry_raw(chip, dat0, dat1); + + return 0; } struct port_fdb_dump_ctx { @@ -626,19 +632,19 @@ struct port_fdb_dump_ctx { dsa_fdb_dump_cb_t *cb; }; -static void alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0, - u32 dat1, int portmap, void *ctx) +static int alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0, + u32 dat1, int portmap, void *ctx) { struct port_fdb_dump_ctx *dump_ctx = ctx; u8 mac[ETH_ALEN]; bool is_static; if ((BIT(dump_ctx->port) & portmap) == 0) - return; + return 0; alr_reg_to_mac(dat0, dat1, mac); is_static = !!(dat1 & LAN9303_ALR_DAT1_STATIC); - dump_ctx->cb(mac, 0, is_static, dump_ctx->data); + return dump_ctx->cb(mac, 0, is_static, dump_ctx->data); } /* Set a static ALR entry. Delete entry if port_map is zero */ @@ -1210,9 +1216,7 @@ static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port, }; dev_dbg(chip->dev, "%s(%d)\n", __func__, port); - lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx); - - return 0; + return lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx); } static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 314ae78bbdd6..e78026ef6d8c 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1404,11 +1404,17 @@ static int gswip_port_fdb_dump(struct dsa_switch *ds, int port, addr[1] = mac_bridge.key[2] & 0xff; addr[0] = (mac_bridge.key[2] >> 8) & 0xff; if (mac_bridge.val[1] & GSWIP_TABLE_MAC_BRIDGE_STATIC) { - if (mac_bridge.val[0] & BIT(port)) - cb(addr, 0, true, data); + if (mac_bridge.val[0] & BIT(port)) { + err = cb(addr, 0, true, data); + if (err) + return err; + } } else { - if (((mac_bridge.val[0] & GENMASK(7, 4)) >> 4) == port) - cb(addr, 0, false, data); + if (((mac_bridge.val[0] & GENMASK(7, 4)) >> 4) == port) { + err = cb(addr, 0, false, data); + if (err) + return err; + } } } return 0; diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 560f6843bb65..c5142f86a3c7 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -687,8 +687,8 @@ static void ksz8_r_vlan_entries(struct ksz_device *dev, u16 addr) shifts = ksz8->shifts; ksz8_r_table(dev, TABLE_VLAN, addr, &data); - addr *= dev->phy_port_cnt; - for (i = 0; i < dev->phy_port_cnt; i++) { + addr *= 4; + for (i = 0; i < 4; i++) { dev->vlan_cache[addr + i].table[0] = (u16)data; data >>= shifts[VLAN_TABLE]; } @@ -702,7 +702,7 @@ static void ksz8_r_vlan_table(struct ksz_device *dev, u16 vid, u16 *vlan) u64 buf; data = (u16 *)&buf; - addr = vid / dev->phy_port_cnt; + addr = vid / 4; index = vid & 3; ksz8_r_table(dev, TABLE_VLAN, addr, &buf); *vlan = data[index]; @@ -716,7 +716,7 @@ static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan) u64 buf; data = (u16 *)&buf; - addr = vid / dev->phy_port_cnt; + addr = vid / 4; index = vid & 3; ksz8_r_table(dev, TABLE_VLAN, addr, &buf); data[index] = vlan; @@ -1119,24 +1119,67 @@ static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag, if (ksz_is_ksz88x3(dev)) return -ENOTSUPP; + /* Discard packets with VID not enabled on the switch */ ksz_cfg(dev, S_MIRROR_CTRL, SW_VLAN_ENABLE, flag); + /* Discard packets with VID not enabled on the ingress port */ + for (port = 0; port < dev->phy_port_cnt; ++port) + ksz_port_cfg(dev, port, REG_PORT_CTRL_2, PORT_INGRESS_FILTER, + flag); + return 0; } +static void ksz8_port_enable_pvid(struct ksz_device *dev, int port, bool state) +{ + if (ksz_is_ksz88x3(dev)) { + ksz_cfg(dev, REG_SW_INSERT_SRC_PVID, + 0x03 << (4 - 2 * port), state); + } else { + ksz_pwrite8(dev, port, REG_PORT_CTRL_12, state ? 0x0f : 0x00); + } +} + static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack) { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; + struct ksz_port *p = &dev->ports[port]; u16 data, new_pvid = 0; u8 fid, member, valid; if (ksz_is_ksz88x3(dev)) return -ENOTSUPP; - ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); + /* If a VLAN is added with untagged flag different from the + * port's Remove Tag flag, we need to change the latter. + * Ignore VID 0, which is always untagged. + * Ignore CPU port, which will always be tagged. + */ + if (untagged != p->remove_tag && vlan->vid != 0 && + port != dev->cpu_port) { + unsigned int vid; + + /* Reject attempts to add a VLAN that requires the + * Remove Tag flag to be changed, unless there are no + * other VLANs currently configured. + */ + for (vid = 1; vid < dev->num_vlans; ++vid) { + /* Skip the VID we are going to add or reconfigure */ + if (vid == vlan->vid) + continue; + + ksz8_from_vlan(dev, dev->vlan_cache[vid].table[0], + &fid, &member, &valid); + if (valid && (member & BIT(port))) + return -EINVAL; + } + + ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); + p->remove_tag = untagged; + } ksz8_r_vlan_table(dev, vlan->vid, &data); ksz8_from_vlan(dev, data, &fid, &member, &valid); @@ -1160,9 +1203,11 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, u16 vid; ksz_pread16(dev, port, REG_PORT_CTRL_VID, &vid); - vid &= 0xfff; + vid &= ~VLAN_VID_MASK; vid |= new_pvid; ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, vid); + + ksz8_port_enable_pvid(dev, port, true); } return 0; @@ -1171,9 +1216,8 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; - u16 data, pvid, new_pvid = 0; + u16 data, pvid; u8 fid, member, valid; if (ksz_is_ksz88x3(dev)) @@ -1182,8 +1226,6 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, ksz_pread16(dev, port, REG_PORT_CTRL_VID, &pvid); pvid = pvid & 0xFFF; - ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); - ksz8_r_vlan_table(dev, vlan->vid, &data); ksz8_from_vlan(dev, data, &fid, &member, &valid); @@ -1195,14 +1237,11 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, valid = 0; } - if (pvid == vlan->vid) - new_pvid = 1; - ksz8_to_vlan(dev, fid, member, valid, &data); ksz8_w_vlan_table(dev, vlan->vid, data); - if (new_pvid != pvid) - ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, pvid); + if (pvid == vlan->vid) + ksz8_port_enable_pvid(dev, port, false); return 0; } @@ -1435,6 +1474,9 @@ static int ksz8_setup(struct dsa_switch *ds) ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false); + if (!ksz_is_ksz88x3(dev)) + ksz_cfg(dev, REG_SW_CTRL_19, SW_INS_TAG_ENABLE, true); + /* set broadcast storm protection 10% rate */ regmap_update_bits(dev->regmap[1], S_REPLACE_VID_CTRL, BROADCAST_STORM_RATE, @@ -1717,6 +1759,16 @@ static int ksz8_switch_init(struct ksz_device *dev) /* set the real number of ports */ dev->ds->num_ports = dev->port_cnt; + /* We rely on software untagging on the CPU port, so that we + * can support both tagged and untagged VLANs + */ + dev->ds->untag_bridge_pvid = true; + + /* VLAN filtering is partly controlled by the global VLAN + * Enable flag + */ + dev->ds->vlan_filtering_is_global = true; + return 0; } diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h index a32355624f31..6b40bc25f7ff 100644 --- a/drivers/net/dsa/microchip/ksz8795_reg.h +++ b/drivers/net/dsa/microchip/ksz8795_reg.h @@ -631,6 +631,10 @@ #define REG_PORT_4_OUT_RATE_3 0xEE #define REG_PORT_5_OUT_RATE_3 0xFE +/* 88x3 specific */ + +#define REG_SW_INSERT_SRC_PVID 0xC2 + /* PME */ #define SW_PME_OUTPUT_ENABLE BIT(1) diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 2e6bfd333f50..1597c63988b4 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -27,6 +27,7 @@ struct ksz_port_mib { struct ksz_port { u16 member; u16 vid_member; + bool remove_tag; /* Remove Tag flag set, for ksz8795 only */ int stp_state; struct phy_device phydev; @@ -205,12 +206,8 @@ static inline int ksz_read64(struct ksz_device *dev, u32 reg, u64 *val) int ret; ret = regmap_bulk_read(dev->regmap[2], reg, value, 2); - if (!ret) { - /* Ick! ToDo: Add 64bit R/W to regmap on 32bit systems */ - value[0] = swab32(value[0]); - value[1] = swab32(value[1]); - *val = swab64((u64)*value); - } + if (!ret) + *val = (u64)value[0] << 32 | value[1]; return ret; } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 385e169080d9..d757d9dcba51 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -47,6 +47,7 @@ static const struct mt7530_mib_desc mt7530_mib[] = { MIB_DESC(2, 0x48, "TxBytes"), MIB_DESC(1, 0x60, "RxDrop"), MIB_DESC(1, 0x64, "RxFiltering"), + MIB_DESC(1, 0x68, "RxUnicast"), MIB_DESC(1, 0x6c, "RxMulticast"), MIB_DESC(1, 0x70, "RxBroadcast"), MIB_DESC(1, 0x74, "RxAlignErr"), @@ -1190,18 +1191,6 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port, } static int -mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) -{ - struct mt7530_priv *priv = ds->priv; - - mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)), - mrouter ? UNM_FFP(BIT(port)) : 0); - - return 0; -} - -static int mt7530_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { @@ -1257,9 +1246,11 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, MT7530_PORT_FALLBACK_MODE); - mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, + mt7530_rmw(priv, MT7530_PVC_P(port), + VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK, VLAN_ATTR(MT7530_VLAN_TRANSPARENT) | - PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) | + MT7530_VLAN_ACC_ALL); /* Set PVID to 0 */ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, @@ -1297,6 +1288,11 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) MT7530_PORT_SECURITY_MODE); mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, G0_PORT_VID(priv->ports[port].pvid)); + + /* Only accept tagged frames if PVID is not set */ + if (!priv->ports[port].pvid) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); } /* Set the port as a user port which is to be able to recognize VID @@ -1320,11 +1316,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, /* Remove this port from the port matrix of the other ports * in the same bridge. If the port is disabled, port matrix * is kept and not being setup until the port becomes enabled. - * And the other port's port matrix cannot be broken when the - * other port is still a VLAN-aware port. */ - if (dsa_is_user_port(ds, i) && i != port && - !dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) { + if (dsa_is_user_port(ds, i) && i != port) { if (dsa_to_port(ds, i)->bridge_dev != bridge) continue; if (priv->ports[i].enable) @@ -1624,11 +1617,26 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port, if (pvid) { priv->ports[port].pvid = vlan->vid; + /* Accept all frames if PVID is set */ + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_ALL); + /* Only configure PVID if VLAN filtering is enabled */ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, G0_PORT_VID(vlan->vid)); + } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) { + /* This VLAN is overwritten without PVID, so unset it */ + priv->ports[port].pvid = G0_PORT_VID_DEF; + + /* Only accept tagged frames if the port is VLAN-aware */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } mutex_unlock(&priv->reg_mutex); @@ -1654,6 +1662,12 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, */ if (priv->ports[port].pvid == vlan->vid) { priv->ports[port].pvid = G0_PORT_VID_DEF; + + /* Only accept tagged frames if the port is VLAN-aware */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, G0_PORT_VID_DEF); } @@ -3086,7 +3100,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .port_stp_state_set = mt7530_stp_state_set, .port_pre_bridge_flags = mt7530_port_pre_bridge_flags, .port_bridge_flags = mt7530_port_bridge_flags, - .port_set_mrouter = mt7530_port_set_mrouter, .port_bridge_join = mt7530_port_bridge_join, .port_bridge_leave = mt7530_port_bridge_leave, .port_fdb_add = mt7530_port_fdb_add, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 4a91d80f51bb..fe4cd2ac26d0 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -238,6 +238,7 @@ enum mt7530_port_mode { #define PVC_EG_TAG_MASK PVC_EG_TAG(7) #define VLAN_ATTR(x) (((x) & 0x3) << 6) #define VLAN_ATTR_MASK VLAN_ATTR(3) +#define ACC_FRM_MASK GENMASK(1, 0) enum mt7530_vlan_port_eg_tag { MT7530_VLAN_EG_DISABLED = 0, @@ -249,6 +250,12 @@ enum mt7530_vlan_port_attr { MT7530_VLAN_TRANSPARENT = 3, }; +enum mt7530_vlan_port_acc_frm { + MT7530_VLAN_ACC_ALL = 0, + MT7530_VLAN_ACC_TAGGED = 1, + MT7530_VLAN_ACC_UNTAGGED = 2, +}; + #define STAG_VPID (((x) & 0xffff) << 16) /* Register for port port-and-protocol based vlan 1 control */ diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 634a48e6616b..7a2445a34eb7 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -2,6 +2,7 @@ config NET_DSA_MV88E6XXX tristate "Marvell 88E6xxx Ethernet switch fabric support" depends on NET_DSA + depends on PTP_1588_CLOCK_OPTIONAL select IRQ_DOMAIN select NET_DSA_TAG_EDSA select NET_DSA_TAG_DSA diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ddb51dd132ef..c45ca2473743 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5797,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - bool do_fast_age = false; int err = -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); @@ -5809,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, err = mv88e6xxx_port_set_assoc_vector(chip, port, pav); if (err) goto out; - - if (!learning) - do_fast_age = true; } if (flags.mask & BR_FLOOD) { @@ -5843,26 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, out: mv88e6xxx_reg_unlock(chip); - if (do_fast_age) - mv88e6xxx_port_fast_age(ds, port); - - return err; -} - -static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port, - bool mrouter, - struct netlink_ext_ack *extack) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - if (!chip->info->ops->port_set_mcast_flood) - return -EOPNOTSUPP; - - mv88e6xxx_reg_lock(chip); - err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter); - mv88e6xxx_reg_unlock(chip); - return err; } @@ -6167,7 +6143,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_pre_bridge_flags = mv88e6xxx_port_pre_bridge_flags, .port_bridge_flags = mv88e6xxx_port_bridge_flags, - .port_set_mrouter = mv88e6xxx_port_set_mrouter, .port_stp_state_set = mv88e6xxx_port_stp_state_set, .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig index 932b6b6fe817..9948544ba1c4 100644 --- a/drivers/net/dsa/ocelot/Kconfig +++ b/drivers/net/dsa/ocelot/Kconfig @@ -5,6 +5,7 @@ config NET_DSA_MSCC_FELIX depends on NET_VENDOR_MICROSEMI depends on NET_VENDOR_FREESCALE depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL select MSCC_OCELOT_SWITCH_LIB select NET_DSA_TAG_OCELOT_8021Q select NET_DSA_TAG_OCELOT @@ -19,6 +20,7 @@ config NET_DSA_MSCC_SEVILLE depends on NET_DSA depends on NET_VENDOR_MICROSEMI depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL select MSCC_OCELOT_SWITCH_LIB select NET_DSA_TAG_OCELOT_8021Q select NET_DSA_TAG_OCELOT diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 583a22d901b3..cbe23b20f3fa 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -796,23 +796,6 @@ static int felix_vlan_del(struct dsa_switch *ds, int port, return ocelot_vlan_del(ocelot, port, vlan->vid); } -static int felix_port_enable(struct dsa_switch *ds, int port, - struct phy_device *phy) -{ - struct ocelot *ocelot = ds->priv; - - ocelot_port_enable(ocelot, port, phy); - - return 0; -} - -static void felix_port_disable(struct dsa_switch *ds, int port) -{ - struct ocelot *ocelot = ds->priv; - - return ocelot_port_disable(ocelot, port); -} - static void felix_phylink_validate(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state) @@ -841,25 +824,9 @@ static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port, phy_interface_t interface) { struct ocelot *ocelot = ds->priv; - struct ocelot_port *ocelot_port = ocelot->ports[port]; - int err; - - ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA, - DEV_MAC_ENA_CFG); - ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0); - - err = ocelot_port_flush(ocelot, port); - if (err) - dev_err(ocelot->dev, "failed to flush port %d: %d\n", - port, err); - - /* Put the port in reset. */ - ocelot_port_writel(ocelot_port, - DEV_CLOCK_CFG_MAC_TX_RST | - DEV_CLOCK_CFG_MAC_RX_RST | - DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000), - DEV_CLOCK_CFG); + ocelot_phylink_mac_link_down(ocelot, port, link_an_mode, interface, + FELIX_MAC_QUIRKS); } static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port, @@ -870,75 +837,11 @@ static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port, bool tx_pause, bool rx_pause) { struct ocelot *ocelot = ds->priv; - struct ocelot_port *ocelot_port = ocelot->ports[port]; struct felix *felix = ocelot_to_felix(ocelot); - u32 mac_fc_cfg; - - /* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and - * PORT_RST bits in DEV_CLOCK_CFG. Note that the way this system is - * integrated is that the MAC speed is fixed and it's the PCS who is - * performing the rate adaptation, so we have to write "1000Mbps" into - * the LINK_SPEED field of DEV_CLOCK_CFG (which is also its default - * value). - */ - ocelot_port_writel(ocelot_port, - DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000), - DEV_CLOCK_CFG); - - switch (speed) { - case SPEED_10: - mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(3); - break; - case SPEED_100: - mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(2); - break; - case SPEED_1000: - case SPEED_2500: - mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(1); - break; - default: - dev_err(ocelot->dev, "Unsupported speed on port %d: %d\n", - port, speed); - return; - } - - /* handle Rx pause in all cases, with 2500base-X this is used for rate - * adaptation. - */ - mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA; - - if (tx_pause) - mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA | - SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) | - SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) | - SYS_MAC_FC_CFG_ZERO_PAUSE_ENA; - - /* Flow control. Link speed is only used here to evaluate the time - * specification in incoming pause frames. - */ - ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port); - - ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port); - - ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause); - - /* Undo the effects of felix_phylink_mac_link_down: - * enable MAC module - */ - ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA | - DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG); - - /* Enable receiving frames on the port, and activate auto-learning of - * MAC addresses. - */ - ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO | - ANA_PORT_PORT_CFG_RECV_ENA | - ANA_PORT_PORT_CFG_PORTID_VAL(port), - ANA_PORT_PORT_CFG, port); - /* Core: Enable port for frame transfer */ - ocelot_fields_write(ocelot, port, - QSYS_SWITCH_PORT_MODE_PORT_ENA, 1); + ocelot_phylink_mac_link_up(ocelot, port, phydev, link_an_mode, + interface, speed, duplex, tx_pause, rx_pause, + FELIX_MAC_QUIRKS); if (felix->info->port_sched_speed_set) felix->info->port_sched_speed_set(ocelot, port, speed); @@ -1615,8 +1518,6 @@ const struct dsa_switch_ops felix_switch_ops = { .phylink_mac_config = felix_phylink_mac_config, .phylink_mac_link_down = felix_phylink_mac_link_down, .phylink_mac_link_up = felix_phylink_mac_link_up, - .port_enable = felix_port_enable, - .port_disable = felix_port_disable, .port_fdb_dump = felix_fdb_dump, .port_fdb_add = felix_fdb_add, .port_fdb_del = felix_fdb_del, diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 9da3c6a94c6e..5854bab43327 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -5,6 +5,7 @@ #define _MSCC_FELIX_H #define ocelot_to_felix(o) container_of((o), struct felix, ocelot) +#define FELIX_MAC_QUIRKS OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION /* Platform-specific information */ struct felix_info { diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index ca2ad77b71f1..563d8a279030 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -101,6 +101,23 @@ AR9331_SW_PORT_STATUS_RX_FLOW_EN | AR9331_SW_PORT_STATUS_TX_FLOW_EN | \ AR9331_SW_PORT_STATUS_SPEED_M) +#define AR9331_SW_REG_PORT_CTRL(_port) (0x104 + (_port) * 0x100) +#define AR9331_SW_PORT_CTRL_HEAD_EN BIT(11) +#define AR9331_SW_PORT_CTRL_PORT_STATE GENMASK(2, 0) +#define AR9331_SW_PORT_CTRL_PORT_STATE_DISABLED 0 +#define AR9331_SW_PORT_CTRL_PORT_STATE_BLOCKING 1 +#define AR9331_SW_PORT_CTRL_PORT_STATE_LISTENING 2 +#define AR9331_SW_PORT_CTRL_PORT_STATE_LEARNING 3 +#define AR9331_SW_PORT_CTRL_PORT_STATE_FORWARD 4 + +#define AR9331_SW_REG_PORT_VLAN(_port) (0x108 + (_port) * 0x100) +#define AR9331_SW_PORT_VLAN_8021Q_MODE GENMASK(31, 30) +#define AR9331_SW_8021Q_MODE_SECURE 3 +#define AR9331_SW_8021Q_MODE_CHECK 2 +#define AR9331_SW_8021Q_MODE_FALLBACK 1 +#define AR9331_SW_8021Q_MODE_NONE 0 +#define AR9331_SW_PORT_VLAN_PORT_VID_MEMBER GENMASK(25, 16) + /* MIB registers */ #define AR9331_MIB_COUNTER(x) (0x20000 + ((x) * 0x100)) @@ -371,12 +388,60 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv) return 0; } -static int ar9331_sw_setup(struct dsa_switch *ds) +static int ar9331_sw_setup_port(struct dsa_switch *ds, int port) { struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; struct regmap *regmap = priv->regmap; + u32 port_mask, port_ctrl, val; int ret; + /* Generate default port settings */ + port_ctrl = FIELD_PREP(AR9331_SW_PORT_CTRL_PORT_STATE, + AR9331_SW_PORT_CTRL_PORT_STATE_FORWARD); + + if (dsa_is_cpu_port(ds, port)) { + /* CPU port should be allowed to communicate with all user + * ports. + */ + port_mask = dsa_user_ports(ds); + /* Enable Atheros header on CPU port. This will allow us + * communicate with each port separately + */ + port_ctrl |= AR9331_SW_PORT_CTRL_HEAD_EN; + } else if (dsa_is_user_port(ds, port)) { + /* User ports should communicate only with the CPU port. + */ + port_mask = BIT(dsa_upstream_port(ds, port)); + } else { + /* Other ports do not need to communicate at all */ + port_mask = 0; + } + + val = FIELD_PREP(AR9331_SW_PORT_VLAN_8021Q_MODE, + AR9331_SW_8021Q_MODE_NONE) | + FIELD_PREP(AR9331_SW_PORT_VLAN_PORT_VID_MEMBER, port_mask); + + ret = regmap_write(regmap, AR9331_SW_REG_PORT_VLAN(port), val); + if (ret) + goto error; + + ret = regmap_write(regmap, AR9331_SW_REG_PORT_CTRL(port), port_ctrl); + if (ret) + goto error; + + return 0; +error: + dev_err(priv->dev, "%s: error: %i\n", __func__, ret); + + return ret; +} + +static int ar9331_sw_setup(struct dsa_switch *ds) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret, i; + ret = ar9331_sw_reset(priv); if (ret) return ret; @@ -402,6 +467,12 @@ static int ar9331_sw_setup(struct dsa_switch *ds) if (ret) goto error; + for (i = 0; i < ds->num_ports; i++) { + ret = ar9331_sw_setup_port(ds, i); + if (ret) + goto error; + } + ds->configure_vlan_while_not_filtering = false; return 0; @@ -837,16 +908,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) return 0; } - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); + /* In case of this switch we work with 32bit registers on top of 16bit + * bus. Some registers (for example access to forwarding database) have + * trigger bit on the first 16bit half of request, the result and + * configuration of request in the second half. + * To make it work properly, we should do the second part of transfer + * before the first one is done. + */ + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, + val >> 16); if (ret < 0) goto error; - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, - val >> 16); + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); if (ret < 0) goto error; return 0; + error: dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); return ret; diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig index b29d41e5e1e7..1291bba3f3b6 100644 --- a/drivers/net/dsa/sja1105/Kconfig +++ b/drivers/net/dsa/sja1105/Kconfig @@ -2,6 +2,7 @@ config NET_DSA_SJA1105 tristate "NXP SJA1105 Ethernet switch family support" depends on NET_DSA && SPI + depends on PTP_1588_CLOCK_OPTIONAL select NET_DSA_TAG_SJA1105 select PCS_XPCS select PACKING diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 9cd7dbdd7db9..2e899c9f036d 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -233,7 +233,6 @@ struct sja1105_private { phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS]; bool fixed_link[SJA1105_MAX_NUM_PORTS]; bool vlan_aware; - unsigned long learn_ena; unsigned long ucast_egress_floods; unsigned long bcast_egress_floods; const struct sja1105_info *info; diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index bd3ad18c150e..f2049f52833c 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, hostcmd = SJA1105_HOSTCMD_INVALIDATE; } sja1105_packing(p, &hostcmd, 25, 23, size, op); +} + +static void +sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); /* Hack - The hardware takes the 'index' field within * struct sja1105_l2_lookup_entry as the index on which this command @@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, * such that our API doesn't need to ask for a full-blown entry * structure when e.g. a delete is requested. */ - sja1105_packing(buf, &cmd->index, 15, 6, - SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op); -} - -static void -sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, - enum packing_op op) -{ - int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; - - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op); } static void sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, enum packing_op op) { - int size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op); } /* The switch is so retarded that it makes our command/entry abstraction diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index f13a6766dd41..05ba65042b5f 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -28,8 +28,6 @@ #define SJA1105_UNKNOWN_MULTICAST 0x010000000000ull #define SJA1105_DEFAULT_VLAN (VLAN_N_VID - 1) -static const struct dsa_switch_ops sja1105_switch_ops; - static void sja1105_hw_reset(struct gpio_desc *gpio, unsigned int pulse_len, unsigned int startup_delay) { @@ -176,7 +174,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) struct sja1105_mac_config_entry *mac; struct dsa_switch *ds = priv->ds; struct sja1105_table *table; - int i; + struct dsa_port *dp; table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG]; @@ -195,8 +193,11 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) mac = table->entries; - for (i = 0; i < ds->num_ports; i++) { - mac[i] = default_mac; + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + mac[dp->index] = default_mac; /* Let sja1105_bridge_stp_state_set() keep address learning * enabled for the DSA ports. CPU ports use software-assisted @@ -205,8 +206,8 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) * CPU ports in a cross-chip topology if multiple CPU ports * exist. */ - if (dsa_is_dsa_port(ds, i)) - priv->learn_ena |= BIT(i); + if (dsa_port_is_dsa(dp)) + dp->learning = true; } return 0; @@ -1484,10 +1485,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin, int sja1105et_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; int last_unused = -1; + int start, end, i; int bin, way, rc; bin = sja1105et_fdb_hash(priv, addr, vid); @@ -1499,7 +1501,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, * mask? If yes, we need to do nothing. If not, we need * to rewrite the entry by adding this port to it. */ - if (l2_lookup.destports & BIT(port)) + if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) return 0; l2_lookup.destports |= BIT(port); } else { @@ -1530,6 +1532,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, index, NULL, false); } } + l2_lookup.lockeds = true; l2_lookup.index = sja1105et_fdb_index(bin, way); rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, @@ -1538,6 +1541,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, if (rc < 0) return rc; + /* Invalidate a dynamically learned entry if that exists */ + start = sja1105et_fdb_index(bin, 0); + end = sja1105et_fdb_index(bin, way); + + for (i = start; i < end; i++) { + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &tmp); + if (rc == -ENOENT) + continue; + if (rc) + return rc; + + if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid) + continue; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + i, NULL, false); + if (rc) + return rc; + + break; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1579,32 +1605,30 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port, int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; int rc, i; /* Search for an existing entry in the FDB table */ l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_aware) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.destports = BIT(port); + tmp = l2_lookup; + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, - SJA1105_SEARCH, &l2_lookup); - if (rc == 0) { - /* Found and this port is already in the entry's + SJA1105_SEARCH, &tmp); + if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) { + /* Found a static entry and this port is already in the entry's * port mask => job done */ - if (l2_lookup.destports & BIT(port)) + if ((tmp.destports & BIT(port)) && tmp.lockeds) return 0; + + l2_lookup = tmp; + /* l2_lookup.index is populated by the switch in case it * found something. */ @@ -1626,16 +1650,46 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, dev_err(ds->dev, "FDB is full, cannot add entry.\n"); return -EINVAL; } - l2_lookup.lockeds = true; l2_lookup.index = i; skip_finding_an_index: + l2_lookup.lockeds = true; + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, l2_lookup.index, &l2_lookup, true); if (rc < 0) return rc; + /* The switch learns dynamic entries and looks up the FDB left to + * right. It is possible that our addition was concurrent with the + * dynamic learning of the same address, so now that the static entry + * has been installed, we are certain that address learning for this + * particular address has been turned off, so the dynamic entry either + * is in the FDB at an index smaller than the static one, or isn't (it + * can also be at a larger index, but in that case it is inactive + * because the static FDB entry will match first, and the dynamic one + * will eventually age out). Search for a dynamically learned address + * prior to our static one and invalidate it. + */ + tmp = l2_lookup; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + SJA1105_SEARCH, &tmp); + if (rc < 0) { + dev_err(ds->dev, + "port %d failed to read back entry for %pM vid %d: %pe\n", + port, addr, vid, ERR_PTR(rc)); + return rc; + } + + if (tmp.index < l2_lookup.index) { + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + tmp.index, NULL, false); + if (rc < 0) + return rc; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1649,15 +1703,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_aware) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.destports = BIT(port); rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, @@ -1740,11 +1787,53 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, /* We need to hide the dsa_8021q VLANs from the user. */ if (!priv->vlan_aware) l2_lookup.vlanid = 0; - cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data); + rc = cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data); + if (rc) + return rc; } return 0; } +static void sja1105_fast_age(struct dsa_switch *ds, int port) +{ + struct sja1105_private *priv = ds->priv; + int i; + + for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) { + struct sja1105_l2_lookup_entry l2_lookup = {0}; + u8 macaddr[ETH_ALEN]; + int rc; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &l2_lookup); + /* No fdb entry at i, not an issue */ + if (rc == -ENOENT) + continue; + if (rc) { + dev_err(ds->dev, "Failed to read FDB: %pe\n", + ERR_PTR(rc)); + return; + } + + if (!(l2_lookup.destports & BIT(port))) + continue; + + /* Don't delete static FDB entries */ + if (l2_lookup.lockeds) + continue; + + u64_to_ether_addr(l2_lookup.macaddr, macaddr); + + rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid); + if (rc) { + dev_err(ds->dev, + "Failed to delete FDB entry %pM vid %lld: %pe\n", + macaddr, l2_lookup.vlanid, ERR_PTR(rc)); + return; + } + } +} + static int sja1105_mdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb) { @@ -1853,6 +1942,7 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port, static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; struct sja1105_mac_config_entry *mac; @@ -1878,12 +1968,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, case BR_STATE_LEARNING: mac[port].ingress = true; mac[port].egress = false; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; case BR_STATE_FORWARDING: mac[port].ingress = true; mac[port].egress = true; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; default: dev_err(ds->dev, "invalid STP state: %d\n", state); @@ -2423,148 +2513,6 @@ static int sja1105_prechangeupper(struct dsa_switch *ds, int port, return 0; } -/* The programming model for the SJA1105 switch is "all-at-once" via static - * configuration tables. Some of these can be dynamically modified at runtime, - * but not the xMII mode parameters table. - * Furthermode, some PHYs may not have crystals for generating their clocks - * (e.g. RMII). Instead, their 50MHz clock is supplied via the SJA1105 port's - * ref_clk pin. So port clocking needs to be initialized early, before - * connecting to PHYs is attempted, otherwise they won't respond through MDIO. - * Setting correct PHY link speed does not matter now. - * But dsa_slave_phy_setup is called later than sja1105_setup, so the PHY - * bindings are not yet parsed by DSA core. We need to parse early so that we - * can populate the xMII mode parameters table. - */ -static int sja1105_setup(struct dsa_switch *ds) -{ - struct sja1105_private *priv = ds->priv; - int rc; - - rc = sja1105_parse_dt(priv); - if (rc < 0) { - dev_err(ds->dev, "Failed to parse DT: %d\n", rc); - return rc; - } - - /* Error out early if internal delays are required through DT - * and we can't apply them. - */ - rc = sja1105_parse_rgmii_delays(priv); - if (rc < 0) { - dev_err(ds->dev, "RGMII delay not supported\n"); - return rc; - } - - rc = sja1105_ptp_clock_register(ds); - if (rc < 0) { - dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc); - return rc; - } - - rc = sja1105_mdiobus_register(ds); - if (rc < 0) { - dev_err(ds->dev, "Failed to register MDIO bus: %pe\n", - ERR_PTR(rc)); - goto out_ptp_clock_unregister; - } - - if (priv->info->disable_microcontroller) { - rc = priv->info->disable_microcontroller(priv); - if (rc < 0) { - dev_err(ds->dev, - "Failed to disable microcontroller: %pe\n", - ERR_PTR(rc)); - goto out_mdiobus_unregister; - } - } - - /* Create and send configuration down to device */ - rc = sja1105_static_config_load(priv); - if (rc < 0) { - dev_err(ds->dev, "Failed to load static config: %d\n", rc); - goto out_mdiobus_unregister; - } - - /* Configure the CGU (PHY link modes and speeds) */ - if (priv->info->clocking_setup) { - rc = priv->info->clocking_setup(priv); - if (rc < 0) { - dev_err(ds->dev, - "Failed to configure MII clocking: %pe\n", - ERR_PTR(rc)); - goto out_static_config_free; - } - } - - /* On SJA1105, VLAN filtering per se is always enabled in hardware. - * The only thing we can do to disable it is lie about what the 802.1Q - * EtherType is. - * So it will still try to apply VLAN filtering, but all ingress - * traffic (except frames received with EtherType of ETH_P_SJA1105) - * will be internally tagged with a distorted VLAN header where the - * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid. - */ - ds->vlan_filtering_is_global = true; - ds->untag_bridge_pvid = true; - /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */ - ds->num_fwd_offloading_bridges = 7; - - /* Advertise the 8 egress queues */ - ds->num_tx_queues = SJA1105_NUM_TC; - - ds->mtu_enforcement_ingress = true; - ds->assisted_learning_on_cpu_port = true; - - rc = sja1105_devlink_setup(ds); - if (rc < 0) - goto out_static_config_free; - - rtnl_lock(); - rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q)); - rtnl_unlock(); - if (rc) - goto out_devlink_teardown; - - return 0; - -out_devlink_teardown: - sja1105_devlink_teardown(ds); -out_mdiobus_unregister: - sja1105_mdiobus_unregister(ds); -out_ptp_clock_unregister: - sja1105_ptp_clock_unregister(ds); -out_static_config_free: - sja1105_static_config_free(&priv->static_config); - - return rc; -} - -static void sja1105_teardown(struct dsa_switch *ds) -{ - struct sja1105_private *priv = ds->priv; - int port; - - rtnl_lock(); - dsa_tag_8021q_unregister(ds); - rtnl_unlock(); - - for (port = 0; port < ds->num_ports; port++) { - struct sja1105_port *sp = &priv->ports[port]; - - if (!dsa_is_user_port(ds, port)) - continue; - - if (sp->xmit_worker) - kthread_destroy_worker(sp->xmit_worker); - } - - sja1105_devlink_teardown(ds); - sja1105_flower_teardown(ds); - sja1105_tas_teardown(ds); - sja1105_ptp_clock_unregister(ds); - sja1105_static_config_free(&priv->static_config); -} - static void sja1105_port_disable(struct dsa_switch *ds, int port) { struct sja1105_private *priv = ds->priv; @@ -2845,23 +2793,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port, bool enabled) { struct sja1105_mac_config_entry *mac; - int rc; mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; mac[port].dyn_learn = enabled; - rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, - &mac[port], true); - if (rc) - return rc; - - if (enabled) - priv->learn_ena |= BIT(port); - else - priv->learn_ena &= ~BIT(port); - - return 0; + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, + &mac[port], true); } static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to, @@ -2977,7 +2915,190 @@ static int sja1105_port_bridge_flags(struct dsa_switch *ds, int port, return 0; } -static const struct dsa_switch_ops sja1105_switch_ops = { +static void sja1105_teardown_ports(struct sja1105_private *priv) +{ + struct dsa_switch *ds = priv->ds; + int port; + + for (port = 0; port < ds->num_ports; port++) { + struct sja1105_port *sp = &priv->ports[port]; + + if (sp->xmit_worker) + kthread_destroy_worker(sp->xmit_worker); + } +} + +static int sja1105_setup_ports(struct sja1105_private *priv) +{ + struct sja1105_tagger_data *tagger_data = &priv->tagger_data; + struct dsa_switch *ds = priv->ds; + int port, rc; + + /* Connections between dsa_port and sja1105_port */ + for (port = 0; port < ds->num_ports; port++) { + struct sja1105_port *sp = &priv->ports[port]; + struct dsa_port *dp = dsa_to_port(ds, port); + struct kthread_worker *worker; + struct net_device *slave; + + if (!dsa_port_is_user(dp)) + continue; + + dp->priv = sp; + sp->dp = dp; + sp->data = tagger_data; + slave = dp->slave; + kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit); + worker = kthread_create_worker(0, "%s_xmit", slave->name); + if (IS_ERR(worker)) { + rc = PTR_ERR(worker); + dev_err(ds->dev, + "failed to create deferred xmit thread: %d\n", + rc); + goto out_destroy_workers; + } + sp->xmit_worker = worker; + skb_queue_head_init(&sp->xmit_queue); + sp->xmit_tpid = ETH_P_SJA1105; + } + + return 0; + +out_destroy_workers: + sja1105_teardown_ports(priv); + return rc; +} + +/* The programming model for the SJA1105 switch is "all-at-once" via static + * configuration tables. Some of these can be dynamically modified at runtime, + * but not the xMII mode parameters table. + * Furthermode, some PHYs may not have crystals for generating their clocks + * (e.g. RMII). Instead, their 50MHz clock is supplied via the SJA1105 port's + * ref_clk pin. So port clocking needs to be initialized early, before + * connecting to PHYs is attempted, otherwise they won't respond through MDIO. + * Setting correct PHY link speed does not matter now. + * But dsa_slave_phy_setup is called later than sja1105_setup, so the PHY + * bindings are not yet parsed by DSA core. We need to parse early so that we + * can populate the xMII mode parameters table. + */ +static int sja1105_setup(struct dsa_switch *ds) +{ + struct sja1105_private *priv = ds->priv; + int rc; + + if (priv->info->disable_microcontroller) { + rc = priv->info->disable_microcontroller(priv); + if (rc < 0) { + dev_err(ds->dev, + "Failed to disable microcontroller: %pe\n", + ERR_PTR(rc)); + return rc; + } + } + + /* Create and send configuration down to device */ + rc = sja1105_static_config_load(priv); + if (rc < 0) { + dev_err(ds->dev, "Failed to load static config: %d\n", rc); + return rc; + } + + /* Configure the CGU (PHY link modes and speeds) */ + if (priv->info->clocking_setup) { + rc = priv->info->clocking_setup(priv); + if (rc < 0) { + dev_err(ds->dev, + "Failed to configure MII clocking: %pe\n", + ERR_PTR(rc)); + goto out_static_config_free; + } + } + + rc = sja1105_setup_ports(priv); + if (rc) + goto out_static_config_free; + + sja1105_tas_setup(ds); + sja1105_flower_setup(ds); + + rc = sja1105_ptp_clock_register(ds); + if (rc < 0) { + dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc); + goto out_flower_teardown; + } + + rc = sja1105_mdiobus_register(ds); + if (rc < 0) { + dev_err(ds->dev, "Failed to register MDIO bus: %pe\n", + ERR_PTR(rc)); + goto out_ptp_clock_unregister; + } + + rc = sja1105_devlink_setup(ds); + if (rc < 0) + goto out_mdiobus_unregister; + + rtnl_lock(); + rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q)); + rtnl_unlock(); + if (rc) + goto out_devlink_teardown; + + /* On SJA1105, VLAN filtering per se is always enabled in hardware. + * The only thing we can do to disable it is lie about what the 802.1Q + * EtherType is. + * So it will still try to apply VLAN filtering, but all ingress + * traffic (except frames received with EtherType of ETH_P_SJA1105) + * will be internally tagged with a distorted VLAN header where the + * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid. + */ + ds->vlan_filtering_is_global = true; + ds->untag_bridge_pvid = true; + /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */ + ds->num_fwd_offloading_bridges = 7; + + /* Advertise the 8 egress queues */ + ds->num_tx_queues = SJA1105_NUM_TC; + + ds->mtu_enforcement_ingress = true; + ds->assisted_learning_on_cpu_port = true; + + return 0; + +out_devlink_teardown: + sja1105_devlink_teardown(ds); +out_mdiobus_unregister: + sja1105_mdiobus_unregister(ds); +out_ptp_clock_unregister: + sja1105_ptp_clock_unregister(ds); +out_flower_teardown: + sja1105_flower_teardown(ds); + sja1105_tas_teardown(ds); + sja1105_teardown_ports(priv); +out_static_config_free: + sja1105_static_config_free(&priv->static_config); + + return rc; +} + +static void sja1105_teardown(struct dsa_switch *ds) +{ + struct sja1105_private *priv = ds->priv; + + rtnl_lock(); + dsa_tag_8021q_unregister(ds); + rtnl_unlock(); + + sja1105_devlink_teardown(ds); + sja1105_mdiobus_unregister(ds); + sja1105_ptp_clock_unregister(ds); + sja1105_flower_teardown(ds); + sja1105_tas_teardown(ds); + sja1105_teardown_ports(priv); + sja1105_static_config_free(&priv->static_config); +} + +const struct dsa_switch_ops sja1105_switch_ops = { .get_tag_protocol = sja1105_get_tag_protocol, .setup = sja1105_setup, .teardown = sja1105_teardown, @@ -2996,6 +3117,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_fdb_dump = sja1105_fdb_dump, .port_fdb_add = sja1105_fdb_add, .port_fdb_del = sja1105_fdb_del, + .port_fast_age = sja1105_fast_age, .port_bridge_join = sja1105_bridge_join, .port_bridge_leave = sja1105_bridge_leave, .port_pre_bridge_flags = sja1105_port_pre_bridge_flags, @@ -3025,6 +3147,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload, .port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload, }; +EXPORT_SYMBOL_GPL(sja1105_switch_ops); static const struct of_device_id sja1105_dt_ids[]; @@ -3077,12 +3200,11 @@ static int sja1105_check_device_id(struct sja1105_private *priv) static int sja1105_probe(struct spi_device *spi) { - struct sja1105_tagger_data *tagger_data; struct device *dev = &spi->dev; struct sja1105_private *priv; size_t max_xfer, max_msg; struct dsa_switch *ds; - int rc, port; + int rc; if (!dev->of_node) { dev_err(dev, "No DTS bindings for SJA1105 driver\n"); @@ -3162,71 +3284,33 @@ static int sja1105_probe(struct spi_device *spi) ds->priv = priv; priv->ds = ds; - tagger_data = &priv->tagger_data; - mutex_init(&priv->ptp_data.lock); mutex_init(&priv->mgmt_lock); - sja1105_tas_setup(ds); - sja1105_flower_setup(ds); + rc = sja1105_parse_dt(priv); + if (rc < 0) { + dev_err(ds->dev, "Failed to parse DT: %d\n", rc); + return rc; + } - rc = dsa_register_switch(priv->ds); - if (rc) + /* Error out early if internal delays are required through DT + * and we can't apply them. + */ + rc = sja1105_parse_rgmii_delays(priv); + if (rc < 0) { + dev_err(ds->dev, "RGMII delay not supported\n"); return rc; + } if (IS_ENABLED(CONFIG_NET_SCH_CBS)) { priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers, sizeof(struct sja1105_cbs_entry), GFP_KERNEL); - if (!priv->cbs) { - rc = -ENOMEM; - goto out_unregister_switch; - } - } - - /* Connections between dsa_port and sja1105_port */ - for (port = 0; port < ds->num_ports; port++) { - struct sja1105_port *sp = &priv->ports[port]; - struct dsa_port *dp = dsa_to_port(ds, port); - struct net_device *slave; - - if (!dsa_is_user_port(ds, port)) - continue; - - dp->priv = sp; - sp->dp = dp; - sp->data = tagger_data; - slave = dp->slave; - kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit); - sp->xmit_worker = kthread_create_worker(0, "%s_xmit", - slave->name); - if (IS_ERR(sp->xmit_worker)) { - rc = PTR_ERR(sp->xmit_worker); - dev_err(ds->dev, - "failed to create deferred xmit thread: %d\n", - rc); - goto out_destroy_workers; - } - skb_queue_head_init(&sp->xmit_queue); - sp->xmit_tpid = ETH_P_SJA1105; - } - - return 0; - -out_destroy_workers: - while (port-- > 0) { - struct sja1105_port *sp = &priv->ports[port]; - - if (!dsa_is_user_port(ds, port)) - continue; - - kthread_destroy_worker(sp->xmit_worker); + if (!priv->cbs) + return -ENOMEM; } -out_unregister_switch: - dsa_unregister_switch(ds); - - return rc; + return dsa_register_switch(priv->ds); } static int sja1105_remove(struct spi_device *spi) diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 19aea8fb76f6..705d3900e43a 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -284,8 +284,7 @@ static int sja1105_mdiobus_base_tx_register(struct sja1105_private *priv, struct mii_bus *bus; int rc = 0; - np = of_find_compatible_node(mdio_node, NULL, - "nxp,sja1110-base-tx-mdio"); + np = of_get_compatible_child(mdio_node, "nxp,sja1110-base-tx-mdio"); if (!np) return 0; @@ -339,8 +338,7 @@ static int sja1105_mdiobus_base_t1_register(struct sja1105_private *priv, struct mii_bus *bus; int rc = 0; - np = of_find_compatible_node(mdio_node, NULL, - "nxp,sja1110-base-t1-mdio"); + np = of_get_compatible_child(mdio_node, "nxp,sja1110-base-t1-mdio"); if (!np) return 0; diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index c6a3abec86f5..4786f0504691 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -170,11 +170,11 @@ config AMD_XGBE tristate "AMD 10GbE Ethernet driver" depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM depends on X86 || ARM64 || COMPILE_TEST + depends on PTP_1588_CLOCK_OPTIONAL select BITREVERSE select CRC32 select PHYLIB select AMD_XGBE_HAVE_ECC if X86 - imply PTP_1588_CLOCK help This driver supports the AMD 10GbE Ethernet device found on an AMD SoC. diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 1a02ca600b71..56e0fb07aec7 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -122,8 +122,8 @@ config SB1250_MAC config TIGON3 tristate "Broadcom Tigon3 support" depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL select PHYLIB - imply PTP_1588_CLOCK help This driver supports Broadcom Tigon3 based gigabit Ethernet cards. @@ -140,7 +140,7 @@ config TIGON3_HWMON config BNX2X tristate "Broadcom NetXtremeII 10Gb support" depends on PCI - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select ZLIB_INFLATE select LIBCRC32C @@ -206,7 +206,7 @@ config SYSTEMPORT config BNXT tristate "Broadcom NetXtreme-C/E support" depends on PCI - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select LIBCRC32C select NET_DEVLINK diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1a6ec1a12d53..b5d954cb409a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) + rc = bnx2x_alloc_fw_stats_mem(bp); + if (rc) LOAD_ERROR_EXIT(bp, load_error0); /* request pf to initialize status blocks */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 865fcb8cf29f..893bdaf03043 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -72,7 +72,8 @@ #include "bnxt_debugfs.h" #define BNXT_TX_TIMEOUT (5 * HZ) -#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW) +#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \ + NETIF_MSG_TX_ERR) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Broadcom BCM573xx network driver"); @@ -367,6 +368,33 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) return md_dst->u.port_info.port_id; } +static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr, + u16 prod) +{ + bnxt_db_write(bp, &txr->tx_db, prod); + txr->kick_pending = 0; +} + +static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + struct netdev_queue *txq) +{ + netif_tx_stop_queue(txq); + + /* netif_tx_stop_queue() must be done before checking + * tx index in bnxt_tx_avail() below, because in + * bnxt_tx_int(), we update tx index before checking for + * netif_tx_queue_stopped(). + */ + smp_mb(); + if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) { + netif_tx_wake_queue(txq); + return false; + } + + return true; +} + static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -386,6 +414,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) i = skb_get_queue_mapping(skb); if (unlikely(i >= bp->tx_nr_rings)) { dev_kfree_skb_any(skb); + atomic_long_inc(&dev->tx_dropped); return NETDEV_TX_OK; } @@ -395,8 +424,12 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) free_size = bnxt_tx_avail(bp, txr); if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) { - netif_tx_stop_queue(txq); - return NETDEV_TX_BUSY; + /* We must have raced with NAPI cleanup */ + if (net_ratelimit() && txr->kick_pending) + netif_warn(bp, tx_err, dev, + "bnxt: ring busy w/ flush pending!\n"); + if (bnxt_txr_netif_try_stop_queue(bp, txr, txq)) + return NETDEV_TX_BUSY; } length = skb->len; @@ -428,7 +461,10 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb) && atomic_dec_if_positive(&ptp->tx_avail) >= 0) { - if (!bnxt_ptp_parse(skb, &ptp->tx_seqid)) { + if (!bnxt_ptp_parse(skb, &ptp->tx_seqid, + &ptp->tx_hdr_off)) { + if (vlan_tag_flags) + ptp->tx_hdr_off += VLAN_HLEN; lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; } else { @@ -516,21 +552,16 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) normal_tx: if (length < BNXT_MIN_PKT_SIZE) { pad = BNXT_MIN_PKT_SIZE - length; - if (skb_pad(skb, pad)) { + if (skb_pad(skb, pad)) /* SKB already freed. */ - tx_buf->skb = NULL; - return NETDEV_TX_OK; - } + goto tx_kick_pending; length = BNXT_MIN_PKT_SIZE; } mapping = dma_map_single(&pdev->dev, skb->data, len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&pdev->dev, mapping))) { - dev_kfree_skb_any(skb); - tx_buf->skb = NULL; - return NETDEV_TX_OK; - } + if (unlikely(dma_mapping_error(&pdev->dev, mapping))) + goto tx_free; dma_unmap_addr_set(tx_buf, mapping, mapping); flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD | @@ -617,24 +648,17 @@ normal_tx: txr->tx_prod = prod; if (!netdev_xmit_more() || netif_xmit_stopped(txq)) - bnxt_db_write(bp, &txr->tx_db, prod); + bnxt_txr_db_kick(bp, txr, prod); + else + txr->kick_pending = 1; tx_done: if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) { if (netdev_xmit_more() && !tx_buf->is_push) - bnxt_db_write(bp, &txr->tx_db, prod); - - netif_tx_stop_queue(txq); + bnxt_txr_db_kick(bp, txr, prod); - /* netif_tx_stop_queue() must be done before checking - * tx index in bnxt_tx_avail() below, because in - * bnxt_tx_int(), we update tx index before checking for - * netif_tx_queue_stopped(). - */ - smp_mb(); - if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) - netif_tx_wake_queue(txq); + bnxt_txr_netif_try_stop_queue(bp, txr, txq); } return NETDEV_TX_OK; @@ -647,7 +671,6 @@ tx_dma_error: /* start back at beginning and unmap skb */ prod = txr->tx_prod; tx_buf = &txr->tx_buf_ring[prod]; - tx_buf->skb = NULL; dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping), skb_headlen(skb), PCI_DMA_TODEVICE); prod = NEXT_TX(prod); @@ -661,7 +684,13 @@ tx_dma_error: PCI_DMA_TODEVICE); } +tx_free: dev_kfree_skb_any(skb); +tx_kick_pending: + if (txr->kick_pending) + bnxt_txr_db_kick(bp, txr, txr->tx_prod); + txr->tx_buf_ring[txr->tx_prod].skb = NULL; + atomic_long_inc(&dev->tx_dropped); return NETDEV_TX_OK; } @@ -731,14 +760,9 @@ next_tx_int: smp_mb(); if (unlikely(netif_tx_queue_stopped(txq)) && - (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)) { - __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_stopped(txq) && - bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && - txr->dev_state != BNXT_DEV_STATE_CLOSING) - netif_tx_wake_queue(txq); - __netif_tx_unlock(txq); - } + bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && + READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING) + netif_tx_wake_queue(txq); } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, @@ -1766,6 +1790,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); prod = rxr->rx_prod; if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) { @@ -1988,6 +2016,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp, if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); cmp_type = RX_CMP_TYPE(rxcmp); if (cmp_type == CMP_TYPE_RX_L2_CMP) { rxcmp1->rx_cmp_cfa_code_errors_v2 |= @@ -2474,6 +2506,10 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) if (!TX_CMP_VALID(txcmp, raw_cons)) break; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { tmp_raw_cons = NEXT_RAW_CMP(raw_cons); cp_cons = RING_CMP(tmp_raw_cons); @@ -9227,10 +9263,9 @@ static void bnxt_disable_napi(struct bnxt *bp) for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + napi_disable(&bp->bnapi[i]->napi); if (bp->bnapi[i]->rx_ring) cancel_work_sync(&cpr->dim.work); - - napi_disable(&bp->bnapi[i]->napi); } } @@ -9264,9 +9299,11 @@ void bnxt_tx_disable(struct bnxt *bp) if (bp->tx_ring) { for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txr->dev_state = BNXT_DEV_STATE_CLOSING; + WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING); } } + /* Make sure napi polls see @dev_state change */ + synchronize_net(); /* Drop carrier first to prevent TX timeout */ netif_carrier_off(bp->dev); /* Stop all TX queues */ @@ -9280,8 +9317,10 @@ void bnxt_tx_enable(struct bnxt *bp) for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txr->dev_state = 0; + WRITE_ONCE(txr->dev_state, 0); } + /* Make sure napi polls see @dev_state change */ + synchronize_net(); netif_tx_wake_all_queues(bp->dev); if (bp->link_info.link_up) netif_carrier_on(bp->dev); @@ -10860,6 +10899,9 @@ static bool bnxt_rfs_supported(struct bnxt *bp) return true; return false; } + /* 212 firmware is broken for aRFS */ + if (BNXT_FW_MAJ(bp) == 212) + return false; if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) return true; if (bp->flags & BNXT_FLAG_NEW_RSS_CAP) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9c3324e76ff7..7b989b6e4f6e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -799,6 +799,7 @@ struct bnxt_tx_ring_info { u16 tx_prod; u16 tx_cons; u16 txq_index; + u8 kick_pending; struct bnxt_db_info tx_db; struct tx_bd *tx_desc_ring[MAX_TX_PAGES]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 64381be935a8..2cd8bb37e641 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -743,14 +743,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp) int bnxt_dl_register(struct bnxt *bp) { + const struct devlink_ops *devlink_ops; struct devlink_port_attrs attrs = {}; struct devlink *dl; int rc; if (BNXT_PF(bp)) - dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_dl_ops; else - dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_vf_dl_ops; + + dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev); if (!dl) { netdev_warn(bp->dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -763,7 +766,7 @@ int bnxt_dl_register(struct bnxt *bp) bp->hwrm_spec_code > 0x10803) bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; - rc = devlink_register(dl, &bp->pdev->dev); + rc = devlink_register(dl); if (rc) { netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc); goto err_dl_free; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 3fc6781c5b98..94d07a9f7034 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -368,6 +368,7 @@ struct cmd_nums { #define HWRM_FUNC_PTP_TS_QUERY 0x19fUL #define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL #define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL + #define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -531,8 +532,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 2 -#define HWRM_VERSION_RSVD 47 -#define HWRM_VERSION_STR "1.10.2.47" +#define HWRM_VERSION_RSVD 52 +#define HWRM_VERSION_STR "1.10.2.52" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -585,6 +586,7 @@ struct hwrm_ver_get_output { #define VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED 0x1000UL #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED 0x2000UL #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL u8 roce_fw_maj_8b; u8 roce_fw_min_8b; u8 roce_fw_bld_8b; @@ -886,7 +888,8 @@ struct hwrm_async_event_cmpl_reset_notify { #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL (0x2UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL (0x3UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET (0x4UL << 8) - #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION (0x5UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK 0xffff0000UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT 16 }; @@ -1236,13 +1239,14 @@ struct hwrm_async_event_cmpl_error_report_base { u8 timestamp_lo; __le16 timestamp_hi; __le32 event_data1; - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD }; /* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */ @@ -1446,6 +1450,8 @@ struct hwrm_func_vf_cfg_input { #define FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS 0x200UL #define FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS 0x400UL #define FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS 0x800UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_TX_KEY_CTXS 0x1000UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_RX_KEY_CTXS 0x2000UL __le16 mtu; __le16 guest_vlan; __le16 async_event_cr; @@ -1469,7 +1475,8 @@ struct hwrm_func_vf_cfg_input { __le16 num_vnics; __le16 num_stat_ctxs; __le16 num_hw_ring_grps; - u8 unused_0[4]; + __le16 num_tx_key_ctxs; + __le16 num_rx_key_ctxs; }; /* hwrm_func_vf_cfg_output (size:128b/16B) */ @@ -1493,7 +1500,7 @@ struct hwrm_func_qcaps_input { u8 unused_0[6]; }; -/* hwrm_func_qcaps_output (size:704b/88B) */ +/* hwrm_func_qcaps_output (size:768b/96B) */ struct hwrm_func_qcaps_output { __le16 error_code; __le16 req_type; @@ -1587,7 +1594,8 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TE_CFA 0x4UL #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_RE_CFA 0x8UL #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_PRIMATE 0x10UL - u8 unused_1; + __le16 max_key_ctxs_alloc; + u8 unused_1[7]; u8 valid; }; @@ -1602,7 +1610,7 @@ struct hwrm_func_qcfg_input { u8 unused_0[6]; }; -/* hwrm_func_qcfg_output (size:832b/104B) */ +/* hwrm_func_qcfg_output (size:896b/112B) */ struct hwrm_func_qcfg_output { __le16 error_code; __le16 req_type; @@ -1749,11 +1757,13 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 __le16 host_mtu; - u8 unused_3; + __le16 alloc_tx_key_ctxs; + __le16 alloc_rx_key_ctxs; + u8 unused_3[5]; u8 valid; }; -/* hwrm_func_cfg_input (size:832b/104B) */ +/* hwrm_func_cfg_input (size:896b/112B) */ struct hwrm_func_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -1820,6 +1830,8 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_ENABLES_PARTITION_MAX_BW 0x8000000UL #define FUNC_CFG_REQ_ENABLES_TPID 0x10000000UL #define FUNC_CFG_REQ_ENABLES_HOST_MTU 0x20000000UL + #define FUNC_CFG_REQ_ENABLES_TX_KEY_CTXS 0x40000000UL + #define FUNC_CFG_REQ_ENABLES_RX_KEY_CTXS 0x80000000UL __le16 admin_mtu; __le16 mru; __le16 num_rsscos_ctxs; @@ -1929,6 +1941,9 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 __be16 tpid; __le16 host_mtu; + __le16 num_tx_key_ctxs; + __le16 num_rx_key_ctxs; + u8 unused_0[4]; }; /* hwrm_func_cfg_output (size:128b/16B) */ @@ -2099,6 +2114,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT 0x40UL #define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT 0x80UL #define FUNC_DRV_RGTR_REQ_FLAGS_RSS_STRICT_HASH_TYPE_SUPPORT 0x100UL + #define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -2268,7 +2284,7 @@ struct hwrm_func_resource_qcaps_input { u8 unused_0[6]; }; -/* hwrm_func_resource_qcaps_output (size:448b/56B) */ +/* hwrm_func_resource_qcaps_output (size:512b/64B) */ struct hwrm_func_resource_qcaps_output { __le16 error_code; __le16 req_type; @@ -2300,11 +2316,15 @@ struct hwrm_func_resource_qcaps_output { __le16 max_tx_scheduler_inputs; __le16 flags; #define FUNC_RESOURCE_QCAPS_RESP_FLAGS_MIN_GUARANTEED 0x1UL + __le16 min_tx_key_ctxs; + __le16 max_tx_key_ctxs; + __le16 min_rx_key_ctxs; + __le16 max_rx_key_ctxs; u8 unused_0[5]; u8 valid; }; -/* hwrm_func_vf_resource_cfg_input (size:448b/56B) */ +/* hwrm_func_vf_resource_cfg_input (size:512b/64B) */ struct hwrm_func_vf_resource_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -2331,6 +2351,10 @@ struct hwrm_func_vf_resource_cfg_input { __le16 max_hw_ring_grps; __le16 flags; #define FUNC_VF_RESOURCE_CFG_REQ_FLAGS_MIN_GUARANTEED 0x1UL + __le16 min_tx_key_ctxs; + __le16 max_tx_key_ctxs; + __le16 min_rx_key_ctxs; + __le16 max_rx_key_ctxs; u8 unused_0[2]; }; @@ -2348,7 +2372,9 @@ struct hwrm_func_vf_resource_cfg_output { __le16 reserved_vnics; __le16 reserved_stat_ctx; __le16 reserved_hw_ring_grps; - u8 unused_0[7]; + __le16 reserved_tx_key_ctxs; + __le16 reserved_rx_key_ctxs; + u8 unused_0[3]; u8 valid; }; @@ -4220,7 +4246,7 @@ struct hwrm_port_lpbk_clr_stats_output { u8 valid; }; -/* hwrm_port_ts_query_input (size:256b/32B) */ +/* hwrm_port_ts_query_input (size:320b/40B) */ struct hwrm_port_ts_query_input { __le16 req_type; __le16 cmpl_ring; @@ -4238,8 +4264,11 @@ struct hwrm_port_ts_query_input { __le16 enables; #define PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT 0x1UL #define PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID 0x2UL + #define PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET 0x4UL __le16 ts_req_timeout; __le32 ptp_seq_id; + __le16 ptp_hdr_offset; + u8 unused_1[6]; }; /* hwrm_port_ts_query_output (size:192b/24B) */ @@ -8172,6 +8201,7 @@ struct hwrm_fw_reset_input { u8 host_idx; u8 flags; #define FW_RESET_REQ_FLAGS_RESET_GRACEFUL 0x1UL + #define FW_RESET_REQ_FLAGS_FW_ACTIVATION 0x2UL u8 unused_0[4]; }; @@ -8952,7 +8982,7 @@ struct hwrm_nvm_get_dir_info_output { u8 valid; }; -/* hwrm_nvm_write_input (size:384b/48B) */ +/* hwrm_nvm_write_input (size:448b/56B) */ struct hwrm_nvm_write_input { __le16 req_type; __le16 cmpl_ring; @@ -8968,7 +8998,11 @@ struct hwrm_nvm_write_input { __le16 option; __le16 flags; #define NVM_WRITE_REQ_FLAGS_KEEP_ORIG_ACTIVE_IMG 0x1UL + #define NVM_WRITE_REQ_FLAGS_BATCH_MODE 0x2UL + #define NVM_WRITE_REQ_FLAGS_BATCH_LAST 0x4UL __le32 dir_item_length; + __le32 offset; + __le32 len; __le32 unused_0; }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 7f55ebbfd04b..2fe3c9081f8d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -20,7 +20,7 @@ #include "bnxt.h" #include "bnxt_ptp.h" -int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id) +int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off) { unsigned int ptp_class; struct ptp_header *hdr; @@ -34,6 +34,7 @@ int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id) if (!hdr) return -EINVAL; + *hdr_off = (u8 *)hdr - skb->data; *seq_id = ntohs(hdr->sequence_id); return 0; default: @@ -94,6 +95,7 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts) PORT_TS_QUERY_REQ_FLAGS_PATH_TX) { req.enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES); req.ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid); + req.ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off); req.ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT); } mutex_lock(&bp->hwrm_cmd_lock); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index cc3cdbaab6cf..fa5f05708e6d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -10,8 +10,8 @@ #ifndef BNXT_PTP_H #define BNXT_PTP_H -#define BNXT_PTP_GRC_WIN 5 -#define BNXT_PTP_GRC_WIN_BASE 0x5000 +#define BNXT_PTP_GRC_WIN 6 +#define BNXT_PTP_GRC_WIN_BASE 0x6000 #define BNXT_MAX_PHC_DRIFT 31000000 #define BNXT_LO_TIMER_MASK 0x0000ffffffffUL @@ -19,7 +19,8 @@ #define BNXT_PTP_QTS_TIMEOUT 1000 #define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \ - PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT) + PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT | \ + PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET) struct pps_pin { u8 event; @@ -88,6 +89,7 @@ struct bnxt_ptp_cfg { #define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ) u16 tx_seqid; + u16 tx_hdr_off; struct bnxt *bp; atomic_t tx_avail; #define BNXT_MAX_TX_TS 1 @@ -125,7 +127,7 @@ do { \ ((dst) = READ_ONCE(src)) #endif -int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id); +int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off); void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index e432a68ac520..5b2a461dfd28 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -22,6 +22,7 @@ if NET_VENDOR_CADENCE config MACB tristate "Cadence MACB/GEM support" depends on HAS_DMA && COMMON_CLK + depends on PTP_1588_CLOCK_OPTIONAL select PHYLINK select CRC32 help diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 181ebc235925..d13fb1d31821 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4533,6 +4533,14 @@ static const struct macb_config sama5d2_config = { .usrio = &macb_default_usrio, }; +static const struct macb_config sama5d29_config = { + .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP, + .dma_burst_length = 16, + .clk_init = macb_clk_init, + .init = macb_init, + .usrio = &macb_default_usrio, +}; + static const struct macb_config sama5d3_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, @@ -4610,6 +4618,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,gem", .data = &pc302gem_config }, { .compatible = "cdns,sam9x60-macb", .data = &at91sam9260_config }, { .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config }, + { .compatible = "atmel,sama5d29-gem", .data = &sama5d29_config }, { .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config }, { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config }, { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config }, diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 4875cdae622e..1c76c95b0b27 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -66,7 +66,7 @@ config LIQUIDIO tristate "Cavium LiquidIO support" depends on 64BIT && PCI depends on PCI - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select LIBCRC32C select NET_DEVLINK @@ -91,7 +91,7 @@ config OCTEON_MGMT_ETHERNET config LIQUIDIO_VF tristate "Cavium LiquidIO VF support" depends on 64BIT && PCI_MSI - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL help This driver supports Cavium LiquidIO Intelligent Server Adapter based on CN23XX chips. diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index af116ef83bad..2907e13b9df6 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) } devlink = devlink_alloc(&liquidio_devlink_ops, - sizeof(struct lio_devlink_priv)); + sizeof(struct lio_devlink_priv), + &octeon_dev->pci_dev->dev); if (!devlink) { dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n"); goto setup_nic_dev_free; @@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio_devlink = devlink_priv(devlink); lio_devlink->oct = octeon_dev; - if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) { + if (devlink_register(devlink)) { devlink_free(devlink); dev_err(&octeon_dev->pci_dev->dev, "devlink registration failed\n"); diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig index 8ba0e08e5e64..c931ec8cac40 100644 --- a/drivers/net/ethernet/chelsio/Kconfig +++ b/drivers/net/ethernet/chelsio/Kconfig @@ -69,6 +69,7 @@ config CHELSIO_T3 config CHELSIO_T4 tristate "Chelsio Communications T4/T5/T6 Ethernet support" depends on PCI && (IPV6 || IPV6=n) && (TLS || TLS=n) + depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select MDIO select ZLIB_DEFLATE diff --git a/drivers/net/ethernet/dec/tulip/media.c b/drivers/net/ethernet/dec/tulip/media.c index 011604787b8e..55d6fc99f40b 100644 --- a/drivers/net/ethernet/dec/tulip/media.c +++ b/drivers/net/ethernet/dec/tulip/media.c @@ -362,7 +362,7 @@ void tulip_select_media(struct net_device *dev, int startup) iowrite32(0x33, ioaddr + CSR12); new_csr6 = 0x01860000; /* Trigger autonegotiation. */ - iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8); + iowrite32(0x0001F868, ioaddr + 0xB8); } else { iowrite32(0x32, ioaddr + CSR12); new_csr6 = 0x00420000; diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 2d1abdd58fab..e04e1c5cb013 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -25,10 +25,10 @@ config FEC depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \ ARCH_MXC || SOC_IMX28 || COMPILE_TEST) default ARCH_MXC || SOC_IMX28 if ARM + depends on PTP_1588_CLOCK_OPTIONAL select CRC32 select PHYLIB imply NET_SELFTESTS - imply PTP_1588_CLOCK help Say Y here if you want to use the built-in 10/100 Fast ethernet controller on some Motorola ColdFire and Freescale i.MX processors. diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig index 626ec58a0afc..0e1439fd00bd 100644 --- a/drivers/net/ethernet/freescale/dpaa/Kconfig +++ b/drivers/net/ethernet/freescale/dpaa/Kconfig @@ -4,7 +4,6 @@ menuconfig FSL_DPAA_ETH depends on FSL_DPAA && FSL_FMAN select PHYLIB select FIXED_PHY - select FSL_FMAN_MAC help Data Path Acceleration Architecture Ethernet driver, supporting the Freescale QorIQ chips. diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 8e09f65ea295..605a39f892b9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) struct dpaa2_eth_devlink_priv *dl_priv; int err; - priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv)); + priv->devlink = + devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev); if (!priv->devlink) { dev_err(dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) dl_priv = devlink_priv(priv->devlink); dl_priv->dpaa2_priv = priv; - err = devlink_register(priv->devlink, dev); + err = devlink_register(priv->devlink); if (err) { dev_err(dev, "devlink_register() = %d\n", err); goto devlink_free; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index d260993ab2dc..1419c8dccea2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -3160,26 +3160,30 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) return err; } -static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev) +static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw) +{ + dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); + dpaa2_switch_free_dpio(ethsw); + dpaa2_switch_destroy_rings(ethsw); + dpaa2_switch_drain_bp(ethsw); + dpaa2_switch_free_dpbp(ethsw); +} + +static void dpaa2_switch_teardown(struct fsl_mc_device *sw_dev) { struct device *dev = &sw_dev->dev; struct ethsw_core *ethsw = dev_get_drvdata(dev); int err; + dpaa2_switch_ctrl_if_teardown(ethsw); + + destroy_workqueue(ethsw->workqueue); + err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle); if (err) dev_warn(dev, "dpsw_close err %d\n", err); } -static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw) -{ - dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); - dpaa2_switch_free_dpio(ethsw); - dpaa2_switch_destroy_rings(ethsw); - dpaa2_switch_drain_bp(ethsw); - dpaa2_switch_free_dpbp(ethsw); -} - static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) { struct ethsw_port_priv *port_priv; @@ -3190,8 +3194,6 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) dev = &sw_dev->dev; ethsw = dev_get_drvdata(dev); - dpaa2_switch_ctrl_if_teardown(ethsw); - dpaa2_switch_teardown_irqs(sw_dev); dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); @@ -3207,9 +3209,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) kfree(ethsw->filter_blocks); kfree(ethsw->ports); - dpaa2_switch_takedown(sw_dev); - - destroy_workqueue(ethsw->workqueue); + dpaa2_switch_teardown(sw_dev); fsl_mc_portal_free(ethsw->mc_io); @@ -3326,7 +3326,7 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) GFP_KERNEL); if (!(ethsw->ports)) { err = -ENOMEM; - goto err_takedown; + goto err_teardown; } ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs), @@ -3397,8 +3397,8 @@ err_free_fdbs: err_free_ports: kfree(ethsw->ports); -err_takedown: - dpaa2_switch_takedown(sw_dev); +err_teardown: + dpaa2_switch_teardown(sw_dev); err_free_cmdport: fsl_mc_portal_free(ethsw->mc_io); diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index ae3259164395..7b4961daa254 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -189,6 +189,8 @@ #define FEC_RXIC0 0xfff #define FEC_RXIC1 0xfff #define FEC_RXIC2 0xfff +#define FEC_LPI_SLEEP 0xfff +#define FEC_LPI_WAKE 0xfff #endif /* CONFIG_M5272 */ @@ -490,6 +492,9 @@ struct bufdesc_ex { */ #define FEC_QUIRK_DELAYED_CLKS_SUPPORT (1 << 21) +/* i.MX8MQ SoC integration mix wakeup interrupt signal into "int2" interrupt line. */ +#define FEC_QUIRK_WAKEUP_FROM_INT2 (1 << 22) + struct bufdesc_prop { int qid; /* Address of Rx and Tx buffers */ @@ -578,6 +583,7 @@ struct fec_enet_private { bool bufdesc_ex; int pause_flag; int wol_flag; + int wake_irq; u32 quirks; struct napi_struct napi; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1201c13afa6f..83ab34b1d735 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -142,7 +142,7 @@ static const struct fec_devinfo fec_imx8mq_info = { FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | - FEC_QUIRK_HAS_EEE, + FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2, }; static const struct fec_devinfo fec_imx8qm_info = { @@ -2878,12 +2878,12 @@ fec_enet_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) device_set_wakeup_enable(&ndev->dev, wol->wolopts & WAKE_MAGIC); if (device_may_wakeup(&ndev->dev)) { fep->wol_flag |= FEC_WOL_FLAG_ENABLE; - if (fep->irq[0] > 0) - enable_irq_wake(fep->irq[0]); + if (fep->wake_irq > 0) + enable_irq_wake(fep->wake_irq); } else { fep->wol_flag &= (~FEC_WOL_FLAG_ENABLE); - if (fep->irq[0] > 0) - disable_irq_wake(fep->irq[0]); + if (fep->wake_irq > 0) + disable_irq_wake(fep->wake_irq); } return 0; @@ -3696,6 +3696,17 @@ static int fec_enet_get_irq_cnt(struct platform_device *pdev) return irq_cnt; } +static void fec_enet_get_wakeup_irq(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct fec_enet_private *fep = netdev_priv(ndev); + + if (fep->quirks & FEC_QUIRK_WAKEUP_FROM_INT2) + fep->wake_irq = fep->irq[2]; + else + fep->wake_irq = fep->irq[0]; +} + static int fec_enet_init_stop_mode(struct fec_enet_private *fep, struct device_node *np) { @@ -3935,6 +3946,9 @@ fec_probe(struct platform_device *pdev) fep->irq[i] = irq; } + /* Decide which interrupt line is wakeup capable */ + fec_enet_get_wakeup_irq(pdev); + ret = fec_enet_mii_init(pdev); if (ret) goto failed_mii_init; @@ -4017,13 +4031,13 @@ fec_drv_remove(struct platform_device *pdev) if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); - free_netdev(ndev); clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + free_netdev(ndev); return 0; } diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 094e4a37a295..3312e1d93c3b 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -91,6 +91,7 @@ config HNS3 tristate "Hisilicon Network Subsystem Support HNS3 (Framework)" depends on PCI select NET_DEVLINK + select PAGE_POOL help This selects the framework support for Hisilicon Network Subsystem 3. This layer facilitates clients like ENET, RoCE and user-space ethernet @@ -103,7 +104,7 @@ config HNS3_HCLGE tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support" default m depends on PCI_MSI - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL help This selects the HNS3_HCLGE network acceleration engine & its hardware compatibility layer. The engine would be used in Hisilicon hip08 family of diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index e0b7c3c44e7b..848bed866193 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -718,6 +718,8 @@ struct hnae3_ae_ops { u32 nsec, u32 sec); int (*get_ts_info)(struct hnae3_handle *handle, struct ethtool_ts_info *info); + int (*get_link_diagnosis_info)(struct hnae3_handle *handle, + u32 *status_code); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cb8d5da3654f..fcbeb1fbe5b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3205,6 +3205,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, unsigned int order = hns3_page_order(ring); struct page *p; + if (ring->page_pool) { + p = page_pool_dev_alloc_frag(ring->page_pool, + &cb->page_offset, + hns3_buf_size(ring)); + if (unlikely(!p)) + return -ENOMEM; + + cb->priv = p; + cb->buf = page_address(p); + cb->dma = page_pool_get_dma_addr(p); + cb->type = DESC_TYPE_PP_FRAG; + cb->reuse_flag = 0; + return 0; + } + p = dev_alloc_pages(order); if (!p) return -ENOMEM; @@ -3227,8 +3242,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring, if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB)) napi_consume_skb(cb->priv, budget); - else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias) - __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); + else if (!HNAE3_IS_TX_RING(ring)) { + if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias) + __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); + else if (cb->type & DESC_TYPE_PP_FRAG) + page_pool_put_full_page(ring->page_pool, cb->priv, + false); + } memset(cb, 0, sizeof(*cb)); } @@ -3315,7 +3335,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring, int ret; ret = hns3_alloc_buffer(ring, cb); - if (ret) + if (ret || ring->page_pool) goto out; ret = hns3_map_buffer(ring, cb); @@ -3337,7 +3357,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i) if (ret) return ret; - ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + + ring->desc_cb[i].page_offset); return 0; } @@ -3367,7 +3388,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, { hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + + ring->desc_cb[i].page_offset); ring->desc[i].rx.bd_base_info = 0; } @@ -3539,6 +3561,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, u32 frag_size = size - pull_len; bool reused; + if (ring->page_pool) { + skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset, + frag_size, truesize); + return; + } + /* Avoid re-using remote or pfmem page */ if (unlikely(!dev_page_is_reusable(desc_cb->priv))) goto out; @@ -3856,6 +3884,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, /* We can reuse buffer as-is, just make sure it is reusable */ if (dev_page_is_reusable(desc_cb->priv)) desc_cb->reuse_flag = 1; + else if (desc_cb->type & DESC_TYPE_PP_FRAG) + page_pool_put_full_page(ring->page_pool, desc_cb->priv, + false); else /* This page cannot be reused so discard it */ __page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias); @@ -3863,6 +3894,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, hns3_rx_ring_move_fw(ring); return 0; } + + if (ring->page_pool) + skb_mark_for_recycle(skb); + u64_stats_update_begin(&ring->syncp); ring->stats.seg_pkt_cnt++; u64_stats_update_end(&ring->syncp); @@ -3901,6 +3936,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring) "alloc rx fraglist skb fail\n"); return -ENXIO; } + + if (ring->page_pool) + skb_mark_for_recycle(new_skb); + ring->frag_num = 0; if (ring->tail_skb) { @@ -4705,6 +4744,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv) priv->ring = NULL; } +static void hns3_alloc_page_pool(struct hns3_enet_ring *ring) +{ + struct page_pool_params pp_params = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG | + PP_FLAG_DMA_SYNC_DEV, + .order = hns3_page_order(ring), + .pool_size = ring->desc_num * hns3_buf_size(ring) / + (PAGE_SIZE << hns3_page_order(ring)), + .nid = dev_to_node(ring_to_dev(ring)), + .dev = ring_to_dev(ring), + .dma_dir = DMA_FROM_DEVICE, + .offset = 0, + .max_len = PAGE_SIZE << hns3_page_order(ring), + }; + + ring->page_pool = page_pool_create(&pp_params); + if (IS_ERR(ring->page_pool)) { + dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n", + PTR_ERR(ring->page_pool)); + ring->page_pool = NULL; + } +} + static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) { int ret; @@ -4724,6 +4786,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) goto out_with_desc_cb; if (!HNAE3_IS_TX_RING(ring)) { + hns3_alloc_page_pool(ring); + ret = hns3_alloc_ring_buffers(ring); if (ret) goto out_with_desc; @@ -4764,6 +4828,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring) devm_kfree(ring_to_dev(ring), tx_spare); ring->tx_spare = NULL; } + + if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) { + page_pool_destroy(ring->page_pool); + ring->page_pool = NULL; + } } static int hns3_buf_size2type(u32 buf_size) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 15af3d93857b..b0e696b08b8b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -6,6 +6,7 @@ #include <linux/dim.h> #include <linux/if_vlan.h> +#include <net/page_pool.h> #include "hnae3.h" @@ -307,6 +308,7 @@ enum hns3_desc_type { DESC_TYPE_BOUNCE_ALL = 1 << 3, DESC_TYPE_BOUNCE_HEAD = 1 << 4, DESC_TYPE_SGL_SKB = 1 << 5, + DESC_TYPE_PP_FRAG = 1 << 6, }; struct hns3_desc_cb { @@ -451,6 +453,7 @@ struct hns3_enet_ring { struct hnae3_queue *tqp; int queue_index; struct device *dev; /* will be used for DMA mapping of descriptors */ + struct page_pool *page_pool; /* statistic */ struct ring_stats stats; @@ -593,6 +596,11 @@ struct hns3_hw_error_info { const char *msg; }; +struct hns3_reset_type_map { + enum ethtool_reset_flags rst_flags; + enum hnae3_reset_type rst_type; +}; + static inline int ring_space(struct hns3_enet_ring *ring) { /* This smp_load_acquire() pairs with smp_store_release() in diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 82061ab6930f..835105015763 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -7,21 +7,7 @@ #include <linux/sfp.h> #include "hns3_enet.h" - -struct hns3_stats { - char stats_string[ETH_GSTRING_LEN]; - int stats_offset; -}; - -struct hns3_sfp_type { - u8 type; - u8 ext_type; -}; - -struct hns3_pflag_desc { - char name[ETH_GSTRING_LEN]; - void (*handler)(struct net_device *netdev, bool enable); -}; +#include "hns3_ethtool.h" /* tqp related stats */ #define HNS3_TQP_STAT(_string, _member) { \ @@ -953,6 +939,60 @@ static int hns3_get_rxnfc(struct net_device *netdev, } } +static const struct hns3_reset_type_map hns3_reset_type[] = { + {ETH_RESET_MGMT, HNAE3_IMP_RESET}, + {ETH_RESET_ALL, HNAE3_GLOBAL_RESET}, + {ETH_RESET_DEDICATED, HNAE3_FUNC_RESET}, +}; + +static const struct hns3_reset_type_map hns3vf_reset_type[] = { + {ETH_RESET_DEDICATED, HNAE3_VF_FUNC_RESET}, +}; + +static int hns3_set_reset(struct net_device *netdev, u32 *flags) +{ + enum hnae3_reset_type rst_type = HNAE3_NONE_RESET; + struct hnae3_handle *h = hns3_get_handle(netdev); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); + const struct hnae3_ae_ops *ops = h->ae_algo->ops; + const struct hns3_reset_type_map *rst_type_map; + u32 i, size; + + if (ops->ae_dev_resetting && ops->ae_dev_resetting(h)) + return -EBUSY; + + if (!ops->set_default_reset_request || !ops->reset_event) + return -EOPNOTSUPP; + + if (h->flags & HNAE3_SUPPORT_VF) { + rst_type_map = hns3vf_reset_type; + size = ARRAY_SIZE(hns3vf_reset_type); + } else { + rst_type_map = hns3_reset_type; + size = ARRAY_SIZE(hns3_reset_type); + } + + for (i = 0; i < size; i++) { + if (rst_type_map[i].rst_flags == *flags) { + rst_type = rst_type_map[i].rst_type; + break; + } + } + + if (rst_type == HNAE3_NONE_RESET || + (rst_type == HNAE3_IMP_RESET && + ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2)) + return -EOPNOTSUPP; + + netdev_info(netdev, "Setting reset type %d\n", rst_type); + + ops->set_default_reset_request(ae_dev, rst_type); + + ops->reset_event(h->pdev, h); + + return 0; +} + static void hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv, u32 tx_desc_num, u32 rx_desc_num) { @@ -1671,6 +1711,71 @@ static int hns3_get_ts_info(struct net_device *netdev, return ethtool_op_get_ts_info(netdev, info); } +static const struct hns3_ethtool_link_ext_state_mapping +hns3_link_ext_state_map[] = { + {1, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD}, + {2, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED}, + + {256, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT}, + {257, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY}, + {512, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT}, + + {513, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK}, + {514, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED}, + {515, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED}, + + {768, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS}, + {769, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST}, + {770, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS}, + + {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0}, + {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + + {1026, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0}, +}; + +static int hns3_get_link_ext_state(struct net_device *netdev, + struct ethtool_link_ext_state_info *info) +{ + const struct hns3_ethtool_link_ext_state_mapping *map; + struct hnae3_handle *h = hns3_get_handle(netdev); + u32 status_code, i; + int ret; + + if (netif_carrier_ok(netdev)) + return -ENODATA; + + if (!h->ae_algo->ops->get_link_diagnosis_info) + return -EOPNOTSUPP; + + ret = h->ae_algo->ops->get_link_diagnosis_info(h, &status_code); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(hns3_link_ext_state_map); i++) { + map = &hns3_link_ext_state_map[i]; + if (map->status_code == status_code) { + info->link_ext_state = map->link_ext_state; + info->__link_ext_substate = map->link_ext_substate; + return 0; + } + } + + return -ENODATA; +} + static const struct ethtool_ops hns3vf_ethtool_ops = { .supported_coalesce_params = HNS3_ETHTOOL_COALESCE, .get_drvinfo = hns3_get_drvinfo, @@ -1699,6 +1804,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = { .set_priv_flags = hns3_set_priv_flags, .get_tunable = hns3_get_tunable, .set_tunable = hns3_set_tunable, + .reset = hns3_set_reset, }; static const struct ethtool_ops hns3_ethtool_ops = { @@ -1740,6 +1846,8 @@ static const struct ethtool_ops hns3_ethtool_ops = { .get_ts_info = hns3_get_ts_info, .get_tunable = hns3_get_tunable, .set_tunable = hns3_set_tunable, + .reset = hns3_set_reset, + .get_link_ext_state = hns3_get_link_ext_state, }; void hns3_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h new file mode 100644 index 000000000000..822d6fcbc73b --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +// Copyright (c) 2021 Hisilicon Limited. + +#ifndef __HNS3_ETHTOOL_H +#define __HNS3_ETHTOOL_H + +#include <linux/ethtool.h> +#include <linux/netdevice.h> + +struct hns3_stats { + char stats_string[ETH_GSTRING_LEN]; + int stats_offset; +}; + +struct hns3_sfp_type { + u8 type; + u8 ext_type; +}; + +struct hns3_pflag_desc { + char name[ETH_GSTRING_LEN]; + void (*handler)(struct net_device *netdev, bool enable); +}; + +struct hns3_ethtool_link_ext_state_mapping { + u32 status_code; + enum ethtool_link_ext_state link_ext_state; + u8 link_ext_substate; +}; + +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 887297e37cf3..13042f1cac6f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -169,17 +169,19 @@ static bool hclge_is_special_opcode(u16 opcode) /* these commands have several descriptors, * and use the first one to save opcode and return value */ - u16 spec_opcode[] = {HCLGE_OPC_STATS_64_BIT, - HCLGE_OPC_STATS_32_BIT, - HCLGE_OPC_STATS_MAC, - HCLGE_OPC_STATS_MAC_ALL, - HCLGE_OPC_QUERY_32_BIT_REG, - HCLGE_OPC_QUERY_64_BIT_REG, - HCLGE_QUERY_CLEAR_MPF_RAS_INT, - HCLGE_QUERY_CLEAR_PF_RAS_INT, - HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, - HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, - HCLGE_QUERY_ALL_ERR_INFO}; + static const u16 spec_opcode[] = { + HCLGE_OPC_STATS_64_BIT, + HCLGE_OPC_STATS_32_BIT, + HCLGE_OPC_STATS_MAC, + HCLGE_OPC_STATS_MAC_ALL, + HCLGE_OPC_QUERY_32_BIT_REG, + HCLGE_OPC_QUERY_64_BIT_REG, + HCLGE_QUERY_CLEAR_MPF_RAS_INT, + HCLGE_QUERY_CLEAR_PF_RAS_INT, + HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, + HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, + HCLGE_QUERY_ALL_ERR_INFO + }; int i; for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 18bde77ef944..8e5be127909b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -316,6 +316,9 @@ enum hclge_opcode_type { /* PHY command */ HCLGE_OPC_PHY_LINK_KSETTING = 0x7025, HCLGE_OPC_PHY_REG = 0x7026, + + /* Query link diagnosis info command */ + HCLGE_OPC_QUERY_LINK_DIAGNOSIS = 0x702A, }; #define HCLGE_TQP_REG_OFFSET 0x80000 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 06d29945d4e1..e4aad695abcc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -112,22 +112,21 @@ int hclge_devlink_init(struct hclge_dev *hdev) int ret; devlink = devlink_alloc(&hclge_devlink_ops, - sizeof(struct hclge_devlink_priv)); + sizeof(struct hclge_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; + hdev->devlink = devlink; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); goto out_reg_fail; } - hdev->devlink = devlink; - devlink_reload_enable(devlink); return 0; @@ -141,14 +140,9 @@ void hclge_devlink_uninit(struct hclge_dev *hdev) { struct devlink *devlink = hdev->devlink; - if (!devlink) - return; - devlink_reload_disable(devlink); devlink_unregister(devlink); devlink_free(devlink); - - hdev->devlink = NULL; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f15d76ec0068..8779a63d51b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3789,6 +3789,12 @@ static void hclge_do_reset(struct hclge_dev *hdev) } switch (hdev->reset_type) { + case HNAE3_IMP_RESET: + dev_info(&pdev->dev, "IMP reset requested\n"); + val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG); + hnae3_set_bit(val, HCLGE_TRIGGER_IMP_RESET_B, 1); + hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, val); + break; case HNAE3_GLOBAL_RESET: dev_info(&pdev->dev, "global reset requested\n"); val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG); @@ -12837,6 +12843,29 @@ static int hclge_get_module_eeprom(struct hnae3_handle *handle, u32 offset, return 0; } +static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle, + u32 *status_code) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + int ret; + + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) + return -EOPNOTSUPP; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_LINK_DIAGNOSIS, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to query link diagnosis info, ret = %d\n", ret); + return ret; + } + + *status_code = le32_to_cpu(desc.data[0]); + return 0; +} + static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, @@ -12937,6 +12966,7 @@ static const struct hnae3_ae_ops hclge_ops = { .set_tx_hwts_info = hclge_ptp_set_tx_info, .get_rx_hwts = hclge_ptp_get_rx_hwts, .get_ts_info = hclge_ptp_get_ts_info, + .get_link_diagnosis_info = hclge_get_link_diagnosis_info, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index cc31b12904ad..ada5c68f2851 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -194,6 +194,7 @@ enum HLCGE_PORT_TYPE { #define HCLGE_VECTOR0_IMP_CMDQ_ERR_B 4U #define HCLGE_VECTOR0_IMP_RD_POISON_B 5U #define HCLGE_VECTOR0_ALL_MSIX_ERR_B 6U +#define HCLGE_TRIGGER_IMP_RESET_B 7U #define HCLGE_MAC_DEFAULT_FRAME \ (ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index 21a45279fd99..f478770299c6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -112,23 +112,23 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev) struct devlink *devlink; int ret; - devlink = devlink_alloc(&hclgevf_devlink_ops, - sizeof(struct hclgevf_devlink_priv)); + devlink = + devlink_alloc(&hclgevf_devlink_ops, + sizeof(struct hclgevf_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; + hdev->devlink = devlink; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); goto out_reg_fail; } - hdev->devlink = devlink; - devlink_reload_enable(devlink); return 0; @@ -142,14 +142,9 @@ void hclgevf_devlink_uninit(struct hclgevf_dev *hdev) { struct devlink *devlink = hdev->devlink; - if (!devlink) - return; - devlink_reload_disable(devlink); devlink_unregister(devlink); devlink_free(devlink); - - hdev->devlink = NULL; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index 58d5646444b0..6e11ee339f12 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = { .flash_update = hinic_devlink_flash_update, }; -struct devlink *hinic_devlink_alloc(void) +struct devlink *hinic_devlink_alloc(struct device *dev) { - return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev)); + return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev); } void hinic_devlink_free(struct devlink *devlink) @@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink) devlink_free(devlink); } -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev) +int hinic_devlink_register(struct hinic_devlink_priv *priv) { struct devlink *devlink = priv_to_devlink(priv); - return devlink_register(devlink, dev); + return devlink_register(devlink); } void hinic_devlink_unregister(struct hinic_devlink_priv *priv) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h index a090ebcfaabb..9e315011015c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h @@ -108,9 +108,9 @@ struct host_image_st { u32 device_id; }; -struct devlink *hinic_devlink_alloc(void); +struct devlink *hinic_devlink_alloc(struct device *dev); void hinic_devlink_free(struct devlink *devlink); -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev); +int hinic_devlink_register(struct hinic_devlink_priv *priv); void hinic_devlink_unregister(struct hinic_devlink_priv *priv); int hinic_health_reporters_create(struct hinic_devlink_priv *priv); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 428108eb10d2..56b6b04e209b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev) return err; } - err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev); + err = hinic_devlink_register(hwdev->devlink_dev); if (err) { dev_err(&hwif->pdev->dev, "Failed to register devlink\n"); hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 405ee4d2d2b1..881d0b247561 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev) struct devlink *devlink; int err, num_qps; - devlink = hinic_devlink_alloc(); + devlink = hinic_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "Hinic devlink alloc failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c index f8a26459ff65..a78c398bf5b2 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c @@ -836,8 +836,10 @@ int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting) int hinic_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int max_tx_rate) { - u32 speeds[] = {SPEED_10, SPEED_100, SPEED_1000, SPEED_10000, - SPEED_25000, SPEED_40000, SPEED_100000}; + static const u32 speeds[] = { + SPEED_10, SPEED_100, SPEED_1000, SPEED_10000, + SPEED_25000, SPEED_40000, SPEED_100000 + }; struct hinic_dev *nic_dev = netdev_priv(netdev); struct hinic_port_cap port_cap = { 0 }; enum hinic_port_link_state link_state; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 82744a7501c7..b0b6f90deb7d 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -58,8 +58,8 @@ config E1000 config E1000E tristate "Intel(R) PRO/1000 PCI-Express Gigabit Ethernet support" depends on PCI && (!SPARC32 || BROKEN) + depends on PTP_1588_CLOCK_OPTIONAL select CRC32 - imply PTP_1588_CLOCK help This driver supports the PCI-Express Intel(R) PRO/1000 gigabit ethernet family of adapters. For PCI or PCI-X e1000 adapters, @@ -87,7 +87,7 @@ config E1000E_HWTS config IGB tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support" depends on PCI - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL select I2C select I2C_ALGOBIT help @@ -159,9 +159,9 @@ config IXGB config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL select MDIO select PHYLIB - imply PTP_1588_CLOCK help This driver supports Intel(R) 10GbE PCI Express family of adapters. For more information on how to identify your adapter, go @@ -239,7 +239,7 @@ config IXGBEVF_IPSEC config I40E tristate "Intel(R) Ethernet Controller XL710 Family support" - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI select AUXILIARY_BUS help @@ -295,11 +295,11 @@ config ICE tristate "Intel(R) Ethernet Connection E800 Series Support" default n depends on PCI_MSI + depends on PTP_1588_CLOCK_OPTIONAL select AUXILIARY_BUS select DIMLIB select NET_DEVLINK select PLDMFW - imply PTP_1588_CLOCK help This driver supports Intel(R) Ethernet Connection E800 Series of devices. For more information on how to identify your adapter, go @@ -317,7 +317,7 @@ config FM10K tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support" default n depends on PCI_MSI - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL help This driver supports Intel(R) FM10000 Ethernet Switch Host Interface. For more information on how to identify your adapter, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 97c78551395b..2f20980dd9a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4638,7 +4638,7 @@ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) err = i40e_control_wait_rx_q(pf, pf_q, false); if (err) dev_info(&pf->pdev->dev, - "VSI seid %d Rx ring %d dissable timeout\n", + "VSI seid %d Rx ring %d disable timeout\n", vsi->seid, pf_q); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3f25bd8c4924..10a83e5385c7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3663,8 +3663,7 @@ u16 i40e_lan_select_queue(struct net_device *netdev, /* is DCB enabled at all? */ if (vsi->tc_config.numtc == 1) - return i40e_swdcb_skb_tx_hash(netdev, skb, - netdev->real_num_tx_queues); + return netdev_pick_tx(netdev, skb, sb_dev); prio = skb->priority; hw = &vsi->back->hw; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index e8bd04100ecd..68c80f04113c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -136,6 +136,7 @@ struct iavf_q_vector { struct iavf_mac_filter { struct list_head list; u8 macaddr[ETH_ALEN]; + bool is_new_mac; /* filter is new, wait for PF decision */ bool remove; /* filter needs to be removed */ bool add; /* filter needs to be added */ }; @@ -185,12 +186,6 @@ enum iavf_state_t { __IAVF_RUNNING, /* opened, working */ }; -enum iavf_critical_section_t { - __IAVF_IN_CRITICAL_TASK, /* cannot be interrupted */ - __IAVF_IN_CLIENT_TASK, - __IAVF_IN_REMOVE_TASK, /* device being removed */ -}; - #define IAVF_CLOUD_FIELD_OMAC 0x01 #define IAVF_CLOUD_FIELD_IMAC 0x02 #define IAVF_CLOUD_FIELD_IVLAN 0x04 @@ -235,6 +230,9 @@ struct iavf_adapter { struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; struct list_head mac_filter_list; + struct mutex crit_lock; + struct mutex client_lock; + struct mutex remove_lock; /* Lock to protect accesses to MAC and VLAN lists */ spinlock_t mac_vlan_list_lock; char misc_vector_name[IFNAMSIZ + 9]; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index af43fbd8cb75..edbeb27213f8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1352,8 +1352,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (!fltr) return -ENOMEM; - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) { + while (!mutex_trylock(&adapter->crit_lock)) { if (--count == 0) { kfree(fltr); return -EINVAL; @@ -1378,7 +1377,7 @@ ret: if (err && fltr) kfree(fltr); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); return err; } @@ -1563,8 +1562,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, return -EINVAL; } - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) { + while (!mutex_trylock(&adapter->crit_lock)) { if (--count == 0) { kfree(rss_new); return -EINVAL; @@ -1600,7 +1598,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, if (!err) mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); if (!rss_new_add) kfree(rss_new); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index fa6cf20da911..23762a7ef740 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -132,21 +132,18 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, } /** - * iavf_lock_timeout - try to set bit but give up after timeout - * @adapter: board private structure - * @bit: bit to set + * iavf_lock_timeout - try to lock mutex but give up after timeout + * @lock: mutex that should be locked * @msecs: timeout in msecs * * Returns 0 on success, negative on failure **/ -static int iavf_lock_timeout(struct iavf_adapter *adapter, - enum iavf_critical_section_t bit, - unsigned int msecs) +static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) { unsigned int wait, delay = 10; for (wait = 0; wait < msecs; wait += delay) { - if (!test_and_set_bit(bit, &adapter->crit_section)) + if (mutex_trylock(lock)) return 0; msleep(delay); @@ -775,6 +772,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; + f->is_new_mac = true; adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; } else { f->remove = false; @@ -1530,11 +1528,6 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter) set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); iavf_map_rings_to_vectors(adapter); - - if (RSS_AQ(adapter)) - adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; - else - err = iavf_init_rss(adapter); err: return err; } @@ -1944,7 +1937,7 @@ static void iavf_watchdog_task(struct work_struct *work) struct iavf_hw *hw = &adapter->hw; u32 reg_val; - if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section)) + if (!mutex_trylock(&adapter->crit_lock)) goto restart_watchdog; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) @@ -1962,8 +1955,7 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->state = __IAVF_STARTUP; adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; queue_delayed_work(iavf_wq, &adapter->init_task, 10); - clear_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); /* Don't reschedule the watchdog, since we've restarted * the init task. When init_task contacts the PF and * gets everything set up again, it'll restart the @@ -1973,14 +1965,13 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; - clear_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(10)); goto watchdog_done; case __IAVF_RESETTING: - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); return; case __IAVF_DOWN: @@ -2003,7 +1994,7 @@ static void iavf_watchdog_task(struct work_struct *work) } break; case __IAVF_REMOVE: - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); return; default: goto restart_watchdog; @@ -2025,7 +2016,7 @@ watchdog_done: if (adapter->state == __IAVF_RUNNING || adapter->state == __IAVF_COMM_FAILED) iavf_detect_recover_hung(&adapter->vsi); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); restart_watchdog: if (adapter->aq_required) queue_delayed_work(iavf_wq, &adapter->watchdog_task, @@ -2089,7 +2080,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; adapter->state = __IAVF_DOWN; wake_up(&adapter->down_waitqueue); @@ -2122,15 +2113,14 @@ static void iavf_reset_task(struct work_struct *work) /* When device is being removed it doesn't make sense to run the reset * task, just return in such a case. */ - if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + if (mutex_is_locked(&adapter->remove_lock)) return; - if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) { + if (iavf_lock_timeout(&adapter->crit_lock, 200)) { schedule_work(&adapter->reset_task); return; } - while (test_and_set_bit(__IAVF_IN_CLIENT_TASK, - &adapter->crit_section)) + while (!mutex_trylock(&adapter->client_lock)) usleep_range(500, 1000); if (CLIENT_ENABLED(adapter)) { adapter->flags &= ~(IAVF_FLAG_CLIENT_NEEDS_OPEN | @@ -2182,7 +2172,7 @@ static void iavf_reset_task(struct work_struct *work) dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", reg_val); iavf_disable_vf(adapter); - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); + mutex_unlock(&adapter->client_lock); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -2227,6 +2217,14 @@ continue_reset: goto reset_err; } + if (RSS_AQ(adapter)) { + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; + } else { + err = iavf_init_rss(adapter); + if (err) + goto reset_err; + } + adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG; adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; @@ -2301,13 +2299,13 @@ continue_reset: adapter->state = __IAVF_DOWN; wake_up(&adapter->down_waitqueue); } - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->client_lock); + mutex_unlock(&adapter->crit_lock); return; reset_err: - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->client_lock); + mutex_unlock(&adapter->crit_lock); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } @@ -2335,7 +2333,7 @@ static void iavf_adminq_task(struct work_struct *work) if (!event.msg_buf) goto out; - if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) + if (iavf_lock_timeout(&adapter->crit_lock, 200)) goto freedom; do { ret = iavf_clean_arq_element(hw, &event, &pending); @@ -2350,7 +2348,7 @@ static void iavf_adminq_task(struct work_struct *work) if (pending != 0) memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); } while (pending); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); if ((adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || @@ -2417,7 +2415,7 @@ static void iavf_client_task(struct work_struct *work) * later. */ - if (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section)) + if (!mutex_trylock(&adapter->client_lock)) return; if (adapter->flags & IAVF_FLAG_SERVICE_CLIENT_REQUESTED) { @@ -2440,7 +2438,7 @@ static void iavf_client_task(struct work_struct *work) adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_OPEN; } out: - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); + mutex_unlock(&adapter->client_lock); } /** @@ -3043,8 +3041,7 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, if (!filter) return -ENOMEM; - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) { + while (!mutex_trylock(&adapter->crit_lock)) { if (--count == 0) goto err; udelay(1); @@ -3075,7 +3072,7 @@ err: if (err) kfree(filter); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); return err; } @@ -3222,8 +3219,7 @@ static int iavf_open(struct net_device *netdev) return -EIO; } - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) + while (!mutex_trylock(&adapter->crit_lock)) usleep_range(500, 1000); if (adapter->state != __IAVF_DOWN) { @@ -3258,7 +3254,7 @@ static int iavf_open(struct net_device *netdev) iavf_irq_enable(adapter, true); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); return 0; @@ -3270,7 +3266,7 @@ err_setup_rx: err_setup_tx: iavf_free_all_tx_resources(adapter); err_unlock: - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); return err; } @@ -3294,8 +3290,7 @@ static int iavf_close(struct net_device *netdev) if (adapter->state <= __IAVF_DOWN_PENDING) return 0; - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) + while (!mutex_trylock(&adapter->crit_lock)) usleep_range(500, 1000); set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); @@ -3306,7 +3301,7 @@ static int iavf_close(struct net_device *netdev) adapter->state = __IAVF_DOWN_PENDING; iavf_free_traffic_irqs(adapter); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); /* We explicitly don't free resources here because the hardware is * still active and can DMA into memory. Resources are cleared in @@ -3655,8 +3650,8 @@ static void iavf_init_task(struct work_struct *work) init_task.work); struct iavf_hw *hw = &adapter->hw; - if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) { - dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + if (iavf_lock_timeout(&adapter->crit_lock, 5000)) { + dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); return; } switch (adapter->state) { @@ -3691,7 +3686,7 @@ init_failed: } queue_delayed_work(iavf_wq, &adapter->init_task, HZ); out: - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); } /** @@ -3708,12 +3703,12 @@ static void iavf_shutdown(struct pci_dev *pdev) if (netif_running(netdev)) iavf_close(netdev); - if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) - dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + if (iavf_lock_timeout(&adapter->crit_lock, 5000)) + dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ adapter->state = __IAVF_REMOVE; adapter->aq_required = 0; - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); #ifdef CONFIG_PM pci_save_state(pdev); @@ -3807,6 +3802,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove */ + mutex_init(&adapter->crit_lock); + mutex_init(&adapter->client_lock); + mutex_init(&adapter->remove_lock); mutex_init(&hw->aq.asq_mutex); mutex_init(&hw->aq.arq_mutex); @@ -3858,8 +3856,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) netif_device_detach(netdev); - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) + while (!mutex_trylock(&adapter->crit_lock)) usleep_range(500, 1000); if (netif_running(netdev)) { @@ -3870,7 +3867,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + mutex_unlock(&adapter->crit_lock); return 0; } @@ -3932,7 +3929,7 @@ static void iavf_remove(struct pci_dev *pdev) struct iavf_hw *hw = &adapter->hw; int err; /* Indicate we are in remove and not to run reset_task */ - set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); + mutex_lock(&adapter->remove_lock); cancel_delayed_work_sync(&adapter->init_task); cancel_work_sync(&adapter->reset_task); cancel_delayed_work_sync(&adapter->client_task); @@ -3954,8 +3951,8 @@ static void iavf_remove(struct pci_dev *pdev) iavf_request_reset(adapter); msleep(50); } - if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) - dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + if (iavf_lock_timeout(&adapter->crit_lock, 5000)) + dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); /* Shut down all the garbage mashers on the detention level */ adapter->state = __IAVF_REMOVE; @@ -3980,6 +3977,11 @@ static void iavf_remove(struct pci_dev *pdev) /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); mutex_destroy(&hw->aq.asq_mutex); + mutex_destroy(&adapter->client_lock); + mutex_unlock(&adapter->crit_lock); + mutex_destroy(&adapter->crit_lock); + mutex_unlock(&adapter->remove_lock); + mutex_destroy(&adapter->remove_lock); iounmap(hw->hw_addr); pci_release_regions(pdev); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 0eab3c43bdc5..3c735968e1b8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -541,6 +541,47 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) } /** + * iavf_mac_add_ok + * @adapter: adapter structure + * + * Submit list of filters based on PF response. + **/ +static void iavf_mac_add_ok(struct iavf_adapter *adapter) +{ + struct iavf_mac_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + f->is_new_mac = false; + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + +/** + * iavf_mac_add_reject + * @adapter: adapter structure + * + * Remove filters from list based on PF response. + **/ +static void iavf_mac_add_reject(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct iavf_mac_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr)) + f->remove = false; + + if (f->is_new_mac) { + list_del(&f->list); + kfree(f); + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + +/** * iavf_add_vlans * @adapter: adapter structure * @@ -1492,6 +1533,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_OP_ADD_ETH_ADDR: dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", iavf_stat_str(&adapter->hw, v_retval)); + iavf_mac_add_reject(adapter); /* restore administratively set MAC address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); break; @@ -1639,10 +1681,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } } switch (v_opcode) { - case VIRTCHNL_OP_ADD_ETH_ADDR: { + case VIRTCHNL_OP_ADD_ETH_ADDR: + if (!v_retval) + iavf_mac_add_ok(adapter); if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr)) ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); - } break; case VIRTCHNL_OP_GET_STATS: { struct iavf_eth_stats *stats = diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index a450343fbb92..eadcb9958346 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -234,6 +234,7 @@ enum ice_pf_state { ICE_VFLR_EVENT_PENDING, ICE_FLTR_OVERFLOW_PROMISC, ICE_VF_DIS, + ICE_VF_DEINIT_IN_PROGRESS, ICE_CFG_BUSY, ICE_SERVICE_SCHED, ICE_SERVICE_DIS, diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 91b545ab8b8f..8c863d64930b 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -475,7 +475,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev) { struct devlink *devlink; - devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf)); + devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev); if (!devlink) return NULL; @@ -502,7 +502,7 @@ int ice_devlink_register(struct ice_pf *pf) struct device *dev = ice_pf_to_dev(pf); int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) { dev_err(dev, "devlink registration failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 33916ed9e874..60d55d043a94 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -191,6 +191,14 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + /* Under some circumstances, we might receive a request to delete our + * own device address from our uc list. Because we store the device + * address in the VSI's MAC filter list, we need to ignore such + * requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr, ICE_FWD_TO_VSI)) return -EINVAL; @@ -4194,6 +4202,11 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) struct ice_hw *hw; int i, err; + if (pdev->is_virtfn) { + dev_err(dev, "can't probe a virtual function\n"); + return -EINVAL; + } + /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst */ @@ -5119,7 +5132,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EADDRNOTAVAIL; if (ether_addr_equal(netdev->dev_addr, mac)) { - netdev_warn(netdev, "already using mac %pM\n", mac); + netdev_dbg(netdev, "already using mac %pM\n", mac); return 0; } @@ -5130,6 +5143,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EBUSY; } + netif_addr_lock_bh(netdev); /* Clean up old MAC filter. Not an error if old filter doesn't exist */ status = ice_fltr_remove_mac(vsi, netdev->dev_addr, ICE_FWD_TO_VSI); if (status && status != ICE_ERR_DOES_NOT_EXIST) { @@ -5139,30 +5153,28 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) /* Add filter for new MAC. If filter exists, return success */ status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); - if (status == ICE_ERR_ALREADY_EXISTS) { + if (status == ICE_ERR_ALREADY_EXISTS) /* Although this MAC filter is already present in hardware it's * possible in some cases (e.g. bonding) that dev_addr was * modified outside of the driver and needs to be restored back * to this value. */ - memcpy(netdev->dev_addr, mac, netdev->addr_len); netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); - return 0; - } - - /* error if the new filter addition failed */ - if (status) + else if (status) + /* error if the new filter addition failed */ err = -EADDRNOTAVAIL; err_update_filters: if (err) { netdev_err(netdev, "can't set MAC %pM. filter update failed\n", mac); + netif_addr_unlock_bh(netdev); return err; } /* change the netdev's MAC address */ memcpy(netdev->dev_addr, mac, netdev->addr_len); + netif_addr_unlock_bh(netdev); netdev_dbg(vsi->netdev, "updated MAC address to %pM\n", netdev->dev_addr); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 5d5207b56ca9..9e3ddb9b8b51 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -656,7 +656,7 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, * maintaining phase */ if (start_time < current_time) - start_time = div64_u64(current_time + NSEC_PER_MSEC - 1, + start_time = div64_u64(current_time + NSEC_PER_SEC - 1, NSEC_PER_SEC) * NSEC_PER_SEC + phase; start_time -= E810_OUT_PROP_DELAY_NS; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 2826570dab51..e93430ab37f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -615,6 +615,8 @@ void ice_free_vfs(struct ice_pf *pf) struct ice_hw *hw = &pf->hw; unsigned int tmp, i; + set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); + if (!pf->vf) return; @@ -680,6 +682,7 @@ void ice_free_vfs(struct ice_pf *pf) i); clear_bit(ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } @@ -4415,6 +4418,10 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) struct device *dev; int err = 0; + /* if de-init is underway, don't process messages from VF */ + if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state)) + return; + dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) { err = -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 96dd1a4f956a..b1d22e4d5ec9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -52,8 +52,11 @@ static int ixgbe_xsk_pool_enable(struct ixgbe_adapter *adapter, /* Kick start the NAPI context so that receiving will start */ err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); - if (err) + if (err) { + clear_bit(qid, adapter->af_xdp_zc_qps); + xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR); return err; + } } return 0; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ff8db311963c..5d1007e1b5c9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, if (!skb) return ERR_PTR(-ENOMEM); - skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool); + skb_mark_for_recycle(skb); skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); @@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(frag), skb_frag_off(frag), skb_frag_size(frag), PAGE_SIZE); - /* We don't need to reset pp_recycle here. It's already set, so - * just mark fragments for recycling. - */ - page_pool_store_mem_info(skb_frag_page(frag), pool); } return skb; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index b9fbc9f000f2..cf8acabb90ac 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -938,7 +938,7 @@ enum mvpp22_ptp_packet_format { #define MVPP2_BM_COOKIE_POOL_OFFS 8 #define MVPP2_BM_COOKIE_CPU_OFFS 24 -#define MVPP2_BM_SHORT_FRAME_SIZE 704 /* frame size 128 */ +#define MVPP2_BM_SHORT_FRAME_SIZE 736 /* frame size 128 */ #define MVPP2_BM_LONG_FRAME_SIZE 2240 /* frame size 1664 */ #define MVPP2_BM_JUMBO_FRAME_SIZE 10432 /* frame size 9856 */ /* BM short pool packet size diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 99bd8b8aa0e2..744f58f41ecc 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, } if (pp) - skb_mark_for_recycle(skb, page, pp); + skb_mark_for_recycle(skb); else dma_unmap_single_attrs(dev->dev.parent, dma_addr, bm_pool->buf_size, DMA_FROM_DEVICE, diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 16caa02095fe..2aa0ae8abfbb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -12,6 +12,7 @@ config OCTEONTX2_AF select NET_DEVLINK depends on (64BIT && COMPILE_TEST) || ARM64 depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL help This driver supports Marvell's OcteonTX2 Resource Virtualization Unit's admin function manager which manages all RVU HW resources @@ -32,6 +33,7 @@ config OCTEONTX2_PF select OCTEONTX2_MBOX depends on (64BIT && COMPILE_TEST) || ARM64 depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL help This driver supports Marvell's OcteonTX2 NIC physical function. diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 447093361b7a..add4a39edced 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1078,6 +1078,13 @@ enum npc_af_status { NPC_MCAM_ALLOC_DENIED = -702, NPC_MCAM_ALLOC_FAILED = -703, NPC_MCAM_PERM_DENIED = -704, + NPC_FLOW_INTF_INVALID = -707, + NPC_FLOW_CHAN_INVALID = -708, + NPC_FLOW_NO_NIXLF = -709, + NPC_FLOW_NOT_SUPPORTED = -710, + NPC_FLOW_VF_PERM_DENIED = -711, + NPC_FLOW_VF_NOT_INIT = -712, + NPC_FLOW_VF_OVERLAP = -713, }; struct npc_mcam_alloc_entry_req { @@ -1426,4 +1433,13 @@ struct cpt_rxc_time_cfg_req { u16 active_limit; }; +/* CGX mailbox error codes + * Range 1101 - 1200. + */ +enum cgx_af_status { + LMAC_AF_ERR_INVALID_PARAM = -1101, + LMAC_AF_ERR_PF_NOT_MAPPED = -1102, + LMAC_AF_ERR_PERM_DENIED = -1103, +}; + #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 5fe277e354f7..c2438ba5e2ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -924,16 +924,26 @@ static int rvu_setup_hw_resources(struct rvu *rvu) block->lfreset_reg = NPA_AF_LF_RST; sprintf(block->name, "NPA"); err = rvu_alloc_bitmap(&block->lf); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate NPA LF bitmap\n", __func__); return err; + } nix: err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX0); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate NIX0 LFs bitmap\n", __func__); return err; + } + err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX1); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate NIX1 LFs bitmap\n", __func__); return err; + } /* Init SSO group's bitmap */ block = &hw->block[BLKADDR_SSO]; @@ -953,8 +963,11 @@ nix: block->lfreset_reg = SSO_AF_LF_HWGRP_RST; sprintf(block->name, "SSO GROUP"); err = rvu_alloc_bitmap(&block->lf); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate SSO LF bitmap\n", __func__); return err; + } ssow: /* Init SSO workslot's bitmap */ @@ -974,8 +987,11 @@ ssow: block->lfreset_reg = SSOW_AF_LF_HWS_RST; sprintf(block->name, "SSOWS"); err = rvu_alloc_bitmap(&block->lf); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate SSOW LF bitmap\n", __func__); return err; + } tim: /* Init TIM LF's bitmap */ @@ -996,35 +1012,53 @@ tim: block->lfreset_reg = TIM_AF_LF_RST; sprintf(block->name, "TIM"); err = rvu_alloc_bitmap(&block->lf); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate TIM LF bitmap\n", __func__); return err; + } cpt: err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT0); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate CPT0 LF bitmap\n", __func__); return err; + } err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT1); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to allocate CPT1 LF bitmap\n", __func__); return err; + } /* Allocate memory for PFVF data */ rvu->pf = devm_kcalloc(rvu->dev, hw->total_pfs, sizeof(struct rvu_pfvf), GFP_KERNEL); - if (!rvu->pf) + if (!rvu->pf) { + dev_err(rvu->dev, + "%s: Failed to allocate memory for PF's rvu_pfvf struct\n", __func__); return -ENOMEM; + } rvu->hwvf = devm_kcalloc(rvu->dev, hw->total_vfs, sizeof(struct rvu_pfvf), GFP_KERNEL); - if (!rvu->hwvf) + if (!rvu->hwvf) { + dev_err(rvu->dev, + "%s: Failed to allocate memory for VF's rvu_pfvf struct\n", __func__); return -ENOMEM; + } mutex_init(&rvu->rsrc_lock); rvu_fwdata_init(rvu); err = rvu_setup_msix_resources(rvu); - if (err) + if (err) { + dev_err(rvu->dev, + "%s: Failed to setup MSIX resources\n", __func__); return err; + } for (blkid = 0; blkid < BLK_COUNT; blkid++) { block = &hw->block[blkid]; @@ -1050,25 +1084,33 @@ cpt: goto msix_err; err = rvu_npc_init(rvu); - if (err) + if (err) { + dev_err(rvu->dev, "%s: Failed to initialize npc\n", __func__); goto npc_err; + } err = rvu_cgx_init(rvu); - if (err) + if (err) { + dev_err(rvu->dev, "%s: Failed to initialize cgx\n", __func__); goto cgx_err; + } /* Assign MACs for CGX mapped functions */ rvu_setup_pfvf_macaddress(rvu); err = rvu_npa_init(rvu); - if (err) + if (err) { + dev_err(rvu->dev, "%s: Failed to initialize npa\n", __func__); goto npa_err; + } rvu_get_lbk_bufsize(rvu); err = rvu_nix_init(rvu); - if (err) + if (err) { + dev_err(rvu->dev, "%s: Failed to initialize nix\n", __func__); goto nix_err; + } rvu_program_channels(rvu); @@ -2984,27 +3026,37 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = rvu_mbox_init(rvu, &rvu->afpf_wq_info, TYPE_AFPF, rvu->hw->total_pfs, rvu_afpf_mbox_handler, rvu_afpf_mbox_up_handler); - if (err) + if (err) { + dev_err(dev, "%s: Failed to initialize mbox\n", __func__); goto err_hwsetup; + } err = rvu_flr_init(rvu); - if (err) + if (err) { + dev_err(dev, "%s: Failed to initialize flr\n", __func__); goto err_mbox; + } err = rvu_register_interrupts(rvu); - if (err) + if (err) { + dev_err(dev, "%s: Failed to register interrupts\n", __func__); goto err_flr; + } err = rvu_register_dl(rvu); - if (err) + if (err) { + dev_err(dev, "%s: Failed to register devlink\n", __func__); goto err_irq; + } rvu_setup_rvum_blk_revid(rvu); /* Enable AF's VFs (if any) */ err = rvu_enable_sriov(rvu); - if (err) + if (err) { + dev_err(dev, "%s: Failed to enable sriov\n", __func__); goto err_dl; + } /* Initialize debugfs */ rvu_dbg_init(rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 95591e77aea8..d88f595e63b0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -356,6 +356,7 @@ struct rvu_hwinfo { u16 npc_counters; /* No of match stats counters */ u32 lbk_bufsize; /* FIFO size supported by LBK */ bool npc_ext_set; /* Extended register set */ + u64 npc_stat_ena; /* Match stats enable bit */ struct hw_cap cap; struct rvu_block block[BLK_COUNT]; /* Block info */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index fe99ac4a4dd8..d34e59525a09 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -448,7 +448,7 @@ int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) u8 cgx_id, lmac_id; if (!is_cgx_config_permitted(rvu, pcifunc)) - return -EPERM; + return LMAC_AF_ERR_PERM_DENIED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); @@ -507,7 +507,7 @@ static int rvu_lmac_get_stats(struct rvu *rvu, struct msg_req *req, void *cgxd; if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) - return -ENODEV; + return LMAC_AF_ERR_PERM_DENIED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); cgxd = rvu_cgx_pdata(cgx_idx, rvu); @@ -561,7 +561,7 @@ int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu, void *cgxd; if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) - return -EPERM; + return LMAC_AF_ERR_PERM_DENIED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); cgxd = rvu_cgx_pdata(cgx_idx, rvu); @@ -888,7 +888,7 @@ int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req, u8 cgx_id, lmac_id; if (!is_pf_cgxmapped(rvu, pf)) - return -EPERM; + return LMAC_AF_ERR_PF_NOT_MAPPED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); return cgx_get_phy_fec_stats(rvu_cgx_pdata(cgx_id, rvu), lmac_id); @@ -1046,7 +1046,7 @@ int rvu_mbox_handler_cgx_mac_addr_reset(struct rvu *rvu, struct msg_req *req, u8 cgx_id, lmac_id; if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) - return -EPERM; + return LMAC_AF_ERR_PERM_DENIED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); return cgx_lmac_addr_reset(cgx_id, lmac_id); @@ -1060,7 +1060,7 @@ int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu, u8 cgx_id, lmac_id; if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) - return -EPERM; + return LMAC_AF_ERR_PERM_DENIED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 6f963b2f54a7..a55b46ad162d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1503,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu) struct devlink *dl; int err; - dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); + dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink), + rvu->dev); if (!dl) { dev_warn(rvu->dev, "devlink_alloc failed\n"); return -ENOMEM; } - err = devlink_register(dl, rvu->dev); + err = devlink_register(dl); if (err) { dev_err(rvu->dev, "devlink register failed with error %d\n", err); devlink_free(dl); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 53db8ebddb5e..22039d9ce70a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -984,7 +984,7 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; return rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp); } @@ -1405,7 +1405,7 @@ int rvu_mbox_handler_nix_mark_format_cfg(struct rvu *rvu, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; cfg = (((u32)req->offset & 0x7) << 16) | (((u32)req->y_mask & 0xF) << 12) | @@ -1673,7 +1673,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; mutex_lock(&rvu->rsrc_lock); @@ -1795,7 +1795,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0); if (nixlf < 0) @@ -1866,7 +1866,7 @@ static int nix_txschq_free_one(struct rvu *rvu, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0); if (nixlf < 0) @@ -2066,7 +2066,7 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; txsch = &nix_hw->txsch[req->lvl]; pfvf_map = txsch->pfvf_map; @@ -2164,8 +2164,12 @@ static int nix_tx_vtag_free(struct rvu *rvu, int blkaddr, u16 pcifunc, int index) { struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); - struct nix_txvlan *vlan = &nix_hw->txvlan; + struct nix_txvlan *vlan; + + if (!nix_hw) + return NIX_AF_ERR_INVALID_NIXBLK; + vlan = &nix_hw->txvlan; if (vlan->entry2pfvf_map[index] != pcifunc) return NIX_AF_ERR_PARAM; @@ -2206,10 +2210,15 @@ static int nix_tx_vtag_alloc(struct rvu *rvu, int blkaddr, u64 vtag, u8 size) { struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); - struct nix_txvlan *vlan = &nix_hw->txvlan; + struct nix_txvlan *vlan; u64 regval; int index; + if (!nix_hw) + return NIX_AF_ERR_INVALID_NIXBLK; + + vlan = &nix_hw->txvlan; + mutex_lock(&vlan->rsrc_lock); index = rvu_alloc_rsrc(&vlan->rsrc); @@ -2234,12 +2243,16 @@ static int nix_tx_vtag_decfg(struct rvu *rvu, int blkaddr, struct nix_vtag_config *req) { struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); - struct nix_txvlan *vlan = &nix_hw->txvlan; u16 pcifunc = req->hdr.pcifunc; int idx0 = req->tx.vtag0_idx; int idx1 = req->tx.vtag1_idx; + struct nix_txvlan *vlan; int err = 0; + if (!nix_hw) + return NIX_AF_ERR_INVALID_NIXBLK; + + vlan = &nix_hw->txvlan; if (req->tx.free_vtag0 && req->tx.free_vtag1) if (vlan->entry2pfvf_map[idx0] != pcifunc || vlan->entry2pfvf_map[idx1] != pcifunc) @@ -2266,9 +2279,13 @@ static int nix_tx_vtag_cfg(struct rvu *rvu, int blkaddr, struct nix_vtag_config_rsp *rsp) { struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); - struct nix_txvlan *vlan = &nix_hw->txvlan; + struct nix_txvlan *vlan; u16 pcifunc = req->hdr.pcifunc; + if (!nix_hw) + return NIX_AF_ERR_INVALID_NIXBLK; + + vlan = &nix_hw->txvlan; if (req->tx.cfg_vtag0) { rsp->vtag0_idx = nix_tx_vtag_alloc(rvu, blkaddr, @@ -3142,7 +3159,7 @@ static int reserve_flowkey_alg_idx(struct rvu *rvu, int blkaddr, u32 flow_cfg) hw = get_nix_hw(rvu->hw, blkaddr); if (!hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; /* No room to add new flow hash algoritham */ if (hw->flowkey.in_use >= NIX_FLOW_KEY_ALG_MAX) @@ -3182,7 +3199,7 @@ int rvu_mbox_handler_nix_rss_flowkey_cfg(struct rvu *rvu, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; alg_idx = get_flowkey_alg_idx(nix_hw, req->flowkey_cfg); /* Failed to get algo index from the exiting list, reserve new */ @@ -3459,7 +3476,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; if (is_afvf(pcifunc)) rvu_get_lbk_link_max_frs(rvu, &max_mtu); @@ -4126,7 +4143,7 @@ int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu, nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) - return -EINVAL; + return NIX_AF_ERR_INVALID_NIXBLK; /* Find existing matching LSO format, if any */ for (idx = 0; idx < nix_hw->lso.in_use; idx++) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 52b255426c22..6f231008c8a4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -724,7 +724,17 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, action.index = pfvf->promisc_mce_idx; } - req.chan_mask = 0xFFFU; + /* For cn10k the upper two bits of the channel number are + * cpt channel number. with masking out these bits in the + * mcam entry, same entry used for NIX will allow packets + * received from cpt for parsing. + */ + if (!is_rvu_otx2(rvu)) { + req.chan_mask = NIX_CHAN_CPT_X2P_MASK; + } else { + req.chan_mask = 0xFFFU; + } + if (chan_cnt > 1) { if (!is_power_of_2(chan_cnt)) { dev_err(rvu->dev, @@ -1898,9 +1908,22 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) mcam->banks = (npc_const >> 44) & 0xFULL; mcam->banksize = (npc_const >> 28) & 0xFFFFULL; + hw->npc_stat_ena = BIT_ULL(9); /* Extended set */ if (npc_const2) { hw->npc_ext_set = true; + /* 96xx supports only match_stats and npc_counters + * reflected in NPC_AF_CONST reg. + * STAT_SEL and ENA are at [0:8] and 9 bit positions. + * 98xx has both match_stat and ext and npc_counter + * reflected in NPC_AF_CONST2 + * STAT_SEL_EXT added at [12:14] bit position. + * cn10k supports only ext and hence npc_counters in + * NPC_AF_CONST is 0 and npc_counters reflected in NPC_AF_CONST2. + * STAT_SEL bitpos incremented from [0:8] to [0:11] and ENA bit moved to 63 + */ + if (!hw->npc_counters) + hw->npc_stat_ena = BIT_ULL(63); hw->npc_counters = (npc_const2 >> 16) & 0xFFFFULL; mcam->banksize = npc_const2 & 0xFFFFULL; } @@ -1955,7 +1978,7 @@ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr) rvu_write64(rvu, blkaddr, NPC_AF_INTFX_MISS_STAT_ACT(intf), ((mcam->rx_miss_act_cntr >> 9) << 12) | - BIT_ULL(9) | mcam->rx_miss_act_cntr); + hw->npc_stat_ena | mcam->rx_miss_act_cntr); } /* Configure TX interfaces */ @@ -2147,18 +2170,16 @@ static void npc_map_mcam_entry_and_cntr(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 entry, u16 cntr) { u16 index = entry & (mcam->banksize - 1); - u16 bank = npc_get_bank(mcam, entry); + u32 bank = npc_get_bank(mcam, entry); + struct rvu_hwinfo *hw = rvu->hw; /* Set mapping and increment counter's refcnt */ mcam->entry2cntr_map[entry] = cntr; mcam->cntr_refcnt[cntr]++; - /* Enable stats - * NPC_AF_MCAMEX_BANKX_STAT_ACT[14:12] - counter[11:9] - * NPC_AF_MCAMEX_BANKX_STAT_ACT[8:0] - counter[8:0] - */ + /* Enable stats */ rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank), - ((cntr >> 9) << 12) | BIT_ULL(9) | cntr); + ((cntr >> 9) << 12) | hw->npc_stat_ena | cntr); } static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu, @@ -2414,6 +2435,17 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, goto alloc; } + /* For a VF base MCAM match rule is set by its PF. And all the + * further MCAM rules installed by VF on its own are + * concatenated with the base rule set by its PF. Hence PF entries + * should be at lower priority compared to VF entries. Otherwise + * base rule is hit always and rules installed by VF will be of + * no use. Hence if the request is from PF and NOT a priority + * allocation request then allocate low priority entries. + */ + if (!(pcifunc & RVU_PFVF_FUNC_MASK)) + goto lprio_alloc; + /* Find out the search range for non-priority allocation request * * Get MCAM free entry count in middle zone. @@ -2439,6 +2471,7 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, /* Not enough free entries, search all entries in reverse, * so that low priority ones will get used up. */ +lprio_alloc: reverse = true; start = 0; end = mcam->bmap_entries; @@ -3252,7 +3285,7 @@ int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu, /* read MCAM entry STAT_ACT register */ regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank)); - if (!(regval & BIT_ULL(9))) { + if (!(regval & rvu->hw->npc_stat_ena)) { rsp->stat_ena = 0; mutex_unlock(&mcam->lock); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 5c01cf4a9c5b..9bde1bb7e148 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -600,7 +600,7 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf) dev_info(rvu->dev, "Unsupported flow(s):\n"); for_each_set_bit(bit, (unsigned long *)&unsupported, 64) dev_info(rvu->dev, "%s ", npc_get_field_name(bit)); - return NIX_AF_ERR_NPC_KEY_NOT_SUPP; + return -EOPNOTSUPP; } return 0; @@ -995,13 +995,11 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, struct npc_mcam *mcam = &rvu->hw->mcam; struct rvu_npc_mcam_rule dummy = { 0 }; struct rvu_npc_mcam_rule *rule; - bool new = false, msg_from_vf; u16 owner = req->hdr.pcifunc; struct msg_rsp write_rsp; struct mcam_entry *entry; int entry_index, err; - - msg_from_vf = !!(owner & RVU_PFVF_FUNC_MASK); + bool new = false; installed_features = req->features; features = req->features; @@ -1027,7 +1025,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, } /* update mcam entry with default unicast rule attributes */ - if (def_ucast_rule && (msg_from_vf || (req->default_rule && req->append))) { + if (def_ucast_rule && (req->default_rule && req->append)) { missing_features = (def_ucast_rule->features ^ features) & def_ucast_rule->features; if (missing_features) @@ -1130,6 +1128,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, struct npc_install_flow_rsp *rsp) { bool from_vf = !!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK); + struct rvu_switch *rswitch = &rvu->rswitch; int blkaddr, nixlf, err; struct rvu_pfvf *pfvf; bool pf_set_vfs_mac = false; @@ -1139,14 +1138,14 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) { dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__); - return -ENODEV; + return NPC_MCAM_INVALID_REQ; } if (!is_npc_interface_valid(rvu, req->intf)) - return -EINVAL; + return NPC_FLOW_INTF_INVALID; if (from_vf && req->default_rule) - return NPC_MCAM_PERM_DENIED; + return NPC_FLOW_VF_PERM_DENIED; /* Each PF/VF info is maintained in struct rvu_pfvf. * rvu_pfvf for the target PF/VF needs to be retrieved @@ -1172,12 +1171,12 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, err = npc_check_unsupported_flows(rvu, req->features, req->intf); if (err) - return err; + return NPC_FLOW_NOT_SUPPORTED; /* Skip channel validation if AF is installing */ if (!is_pffunc_af(req->hdr.pcifunc) && npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) - return -EINVAL; + return NPC_FLOW_CHAN_INVALID; pfvf = rvu_get_pfvf(rvu, target); @@ -1195,7 +1194,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, /* Proceed if NIXLF is attached or not for TX rules */ err = nix_get_nixlf(rvu, target, &nixlf, NULL); if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) - return -EINVAL; + return NPC_FLOW_NO_NIXLF; /* don't enable rule when nixlf not attached or initialized */ if (!(is_nixlf_attached(rvu, target) && @@ -1211,7 +1210,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, /* Do not allow requests from uninitialized VFs */ if (from_vf && !enable) - return -EINVAL; + return NPC_FLOW_VF_NOT_INIT; /* PF sets VF mac & VF NIXLF is not attached, update the mac addr */ if (pf_set_vfs_mac && !enable) { @@ -1221,15 +1220,12 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, return 0; } - /* If message is from VF then its flow should not overlap with - * reserved unicast flow. - */ - if (from_vf && pfvf->def_ucast_rule && is_npc_intf_rx(req->intf) && - pfvf->def_ucast_rule->features & req->features) - return -EINVAL; + mutex_lock(&rswitch->switch_lock); + err = npc_install_flow(rvu, blkaddr, target, nixlf, pfvf, + req, rsp, enable, pf_set_vfs_mac); + mutex_unlock(&rswitch->switch_lock); - return npc_install_flow(rvu, blkaddr, target, nixlf, pfvf, req, rsp, - enable, pf_set_vfs_mac); + return err; } static int npc_delete_flow(struct rvu *rvu, struct rvu_npc_mcam_rule *rule, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 3254b02205ca..fcaa7df404f3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -7,7 +7,8 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ - otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o -rvu_nicvf-y := otx2_vf.o + otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ + otx2_devlink.o +rvu_nicvf-y := otx2_vf.o otx2_devlink.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 2a80cdc848e5..c4147b64e059 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -19,11 +19,13 @@ #include <linux/timecounter.h> #include <linux/soc/marvell/octeontx2/asm.h> #include <net/pkt_cls.h> +#include <net/devlink.h> #include <mbox.h> #include <npc.h> #include "otx2_reg.h" #include "otx2_txrx.h" +#include "otx2_devlink.h" #include <rvu_trace.h> /* PCI device IDs */ @@ -268,7 +270,6 @@ struct otx2_mac_table { }; struct otx2_flow_config { - u16 entry[NPC_MAX_NONCONTIG_ENTRIES]; u16 *flow_ent; u16 *def_ent; u16 nr_flows; @@ -279,16 +280,13 @@ struct otx2_flow_config { #define OTX2_MCAM_COUNT (OTX2_DEFAULT_FLOWCOUNT + \ OTX2_MAX_UNICAST_FLOWS + \ OTX2_MAX_VLAN_FLOWS) - u16 ntuple_offset; u16 unicast_offset; u16 rx_vlan_offset; u16 vf_vlan_offset; #define OTX2_PER_VF_VLAN_FLOWS 2 /* Rx + Tx per VF */ #define OTX2_VF_VLAN_RX_INDEX 0 #define OTX2_VF_VLAN_TX_INDEX 1 - u16 tc_flower_offset; - u16 ntuple_max_flows; - u16 tc_max_flows; + u16 max_flows; u8 dmacflt_max_flows; u8 *bmap_to_dmacindex; unsigned long dmacflt_bmap; @@ -299,8 +297,7 @@ struct otx2_tc_info { /* hash table to store TC offloaded flows */ struct rhashtable flow_table; struct rhashtable_params flow_ht_params; - DECLARE_BITMAP(tc_entries_bitmap, OTX2_MAX_TC_FLOWS); - unsigned long num_entries; + unsigned long *tc_entries_bitmap; }; struct dev_hw_ops { @@ -353,6 +350,11 @@ struct otx2_nic { struct otx2_vf_config *vf_configs; struct cgx_link_user_info linfo; + /* NPC MCAM */ + struct otx2_flow_config *flow_cfg; + struct otx2_mac_table *mac_table; + struct otx2_tc_info tc_info; + u64 reset_count; struct work_struct reset_task; struct workqueue_struct *flr_wq; @@ -360,7 +362,6 @@ struct otx2_nic { struct refill_work *refill_wrk; struct workqueue_struct *otx2_wq; struct work_struct rx_mode_work; - struct otx2_mac_table *mac_table; /* Ethtool stuff */ u32 msg_enable; @@ -376,9 +377,10 @@ struct otx2_nic { struct otx2_ptp *ptp; struct hwtstamp_config tstamp; - struct otx2_flow_config *flow_cfg; - struct otx2_tc_info tc_info; unsigned long rq_bmap; + + /* Devlink */ + struct otx2_devlink *dl; }; static inline bool is_otx2_lbkvf(struct pci_dev *pdev) @@ -710,6 +712,11 @@ MBOX_UP_CGX_MESSAGES #define RVU_PFVF_FUNC_SHIFT 0 #define RVU_PFVF_FUNC_MASK 0x3FF +static inline bool is_otx2_vf(u16 pcifunc) +{ + return !!(pcifunc & RVU_PFVF_FUNC_MASK); +} + static inline int rvu_get_pf(u16 pcifunc) { return (pcifunc >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK; @@ -815,7 +822,8 @@ int otx2_set_real_num_queues(struct net_device *netdev, int tx_queues, int rx_queues); /* MCAM filter related APIs */ int otx2_mcam_flow_init(struct otx2_nic *pf); -int otx2_alloc_mcam_entries(struct otx2_nic *pfvf); +int otx2vf_mcam_flow_init(struct otx2_nic *pfvf); +int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count); void otx2_mcam_flow_del(struct otx2_nic *pf); int otx2_destroy_ntuple_flows(struct otx2_nic *pf); int otx2_destroy_mcam_flows(struct otx2_nic *pfvf); @@ -828,6 +836,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, int otx2_remove_flow(struct otx2_nic *pfvf, u32 location); int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, struct npc_install_flow_req *req); +int otx2_get_maxflows(struct otx2_flow_config *flow_cfg); void otx2_rss_ctx_flow_del(struct otx2_nic *pfvf, int ctx_id); int otx2_del_macfilter(struct net_device *netdev, const u8 *mac); int otx2_add_macfilter(struct net_device *netdev, const u8 *mac); @@ -839,6 +848,7 @@ int otx2_init_tc(struct otx2_nic *nic); void otx2_shutdown_tc(struct otx2_nic *nic); int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data); +int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic); /* CGX/RPM DMAC filters support */ int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf); int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u8 bit_pos); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c new file mode 100644 index 000000000000..7ac3ef2fa06a --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU PF/VF Netdev Devlink + * + * Copyright (C) 2021 Marvell. + */ + +#include "otx2_common.h" + +/* Devlink Params APIs */ +static int otx2_dl_mcam_count_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct otx2_devlink *otx2_dl = devlink_priv(devlink); + struct otx2_nic *pfvf = otx2_dl->pfvf; + struct otx2_flow_config *flow_cfg; + + if (!pfvf->flow_cfg) { + NL_SET_ERR_MSG_MOD(extack, + "pfvf->flow_cfg not initialized"); + return -EINVAL; + } + + flow_cfg = pfvf->flow_cfg; + if (flow_cfg && flow_cfg->nr_flows) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot modify count when there are active rules"); + return -EINVAL; + } + + return 0; +} + +static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct otx2_devlink *otx2_dl = devlink_priv(devlink); + struct otx2_nic *pfvf = otx2_dl->pfvf; + + if (!pfvf->flow_cfg) + return 0; + + otx2_alloc_mcam_entries(pfvf, ctx->val.vu16); + otx2_tc_alloc_ent_bitmap(pfvf); + + return 0; +} + +static int otx2_dl_mcam_count_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct otx2_devlink *otx2_dl = devlink_priv(devlink); + struct otx2_nic *pfvf = otx2_dl->pfvf; + struct otx2_flow_config *flow_cfg; + + if (!pfvf->flow_cfg) { + ctx->val.vu16 = 0; + return 0; + } + + flow_cfg = pfvf->flow_cfg; + ctx->val.vu16 = flow_cfg->max_flows; + + return 0; +} + +enum otx2_dl_param_id { + OTX2_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + OTX2_DEVLINK_PARAM_ID_MCAM_COUNT, +}; + +static const struct devlink_param otx2_dl_params[] = { + DEVLINK_PARAM_DRIVER(OTX2_DEVLINK_PARAM_ID_MCAM_COUNT, + "mcam_count", DEVLINK_PARAM_TYPE_U16, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + otx2_dl_mcam_count_get, otx2_dl_mcam_count_set, + otx2_dl_mcam_count_validate), +}; + +/* Devlink OPs */ +static int otx2_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct otx2_devlink *otx2_dl = devlink_priv(devlink); + struct otx2_nic *pfvf = otx2_dl->pfvf; + + if (is_otx2_vf(pfvf->pcifunc)) + return devlink_info_driver_name_put(req, "rvu_nicvf"); + + return devlink_info_driver_name_put(req, "rvu_nicpf"); +} + +static const struct devlink_ops otx2_devlink_ops = { + .info_get = otx2_devlink_info_get, +}; + +int otx2_register_dl(struct otx2_nic *pfvf) +{ + struct otx2_devlink *otx2_dl; + struct devlink *dl; + int err; + + dl = devlink_alloc(&otx2_devlink_ops, + sizeof(struct otx2_devlink), pfvf->dev); + if (!dl) { + dev_warn(pfvf->dev, "devlink_alloc failed\n"); + return -ENOMEM; + } + + err = devlink_register(dl); + if (err) { + dev_err(pfvf->dev, "devlink register failed with error %d\n", err); + devlink_free(dl); + return err; + } + + otx2_dl = devlink_priv(dl); + otx2_dl->dl = dl; + otx2_dl->pfvf = pfvf; + pfvf->dl = otx2_dl; + + err = devlink_params_register(dl, otx2_dl_params, + ARRAY_SIZE(otx2_dl_params)); + if (err) { + dev_err(pfvf->dev, + "devlink params register failed with error %d", err); + goto err_dl; + } + + devlink_params_publish(dl); + + return 0; + +err_dl: + devlink_unregister(dl); + devlink_free(dl); + return err; +} + +void otx2_unregister_dl(struct otx2_nic *pfvf) +{ + struct otx2_devlink *otx2_dl = pfvf->dl; + struct devlink *dl; + + if (!otx2_dl || !otx2_dl->dl) + return; + + dl = otx2_dl->dl; + + devlink_params_unregister(dl, otx2_dl_params, + ARRAY_SIZE(otx2_dl_params)); + + devlink_unregister(dl); + devlink_free(dl); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h new file mode 100644 index 000000000000..c7bd4f3c6c6b --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell RVU PF/VF Netdev Devlink + * + * Copyright (C) 2021 Marvell. + * + */ + +#ifndef OTX2_DEVLINK_H +#define OTX2_DEVLINK_H + +struct otx2_devlink { + struct devlink *dl; + struct otx2_nic *pfvf; +}; + +/* Devlink APIs */ +int otx2_register_dl(struct otx2_nic *pfvf); +void otx2_unregister_dl(struct otx2_nic *pfvf); + +#endif /* RVU_DEVLINK_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index b906a0eb6e0d..620da08db317 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -645,6 +645,7 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf, static int otx2_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rules) { + bool ntuple = !!(dev->features & NETIF_F_NTUPLE); struct otx2_nic *pfvf = netdev_priv(dev); int ret = -EOPNOTSUPP; @@ -654,14 +655,18 @@ static int otx2_get_rxnfc(struct net_device *dev, ret = 0; break; case ETHTOOL_GRXCLSRLCNT: - nfc->rule_cnt = pfvf->flow_cfg->nr_flows; - ret = 0; + if (netif_running(dev) && ntuple) { + nfc->rule_cnt = pfvf->flow_cfg->nr_flows; + ret = 0; + } break; case ETHTOOL_GRXCLSRULE: - ret = otx2_get_flow(pfvf, nfc, nfc->fs.location); + if (netif_running(dev) && ntuple) + ret = otx2_get_flow(pfvf, nfc, nfc->fs.location); break; case ETHTOOL_GRXCLSRLALL: - ret = otx2_get_all_flows(pfvf, nfc, rules); + if (netif_running(dev) && ntuple) + ret = otx2_get_all_flows(pfvf, nfc, rules); break; case ETHTOOL_GRXFH: return otx2_get_rss_hash_opts(pfvf, nfc); @@ -696,41 +701,6 @@ static int otx2_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc) return ret; } -static int otx2vf_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *nfc, u32 *rules) -{ - struct otx2_nic *pfvf = netdev_priv(dev); - int ret = -EOPNOTSUPP; - - switch (nfc->cmd) { - case ETHTOOL_GRXRINGS: - nfc->data = pfvf->hw.rx_queues; - ret = 0; - break; - case ETHTOOL_GRXFH: - return otx2_get_rss_hash_opts(pfvf, nfc); - default: - break; - } - return ret; -} - -static int otx2vf_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc) -{ - struct otx2_nic *pfvf = netdev_priv(dev); - int ret = -EOPNOTSUPP; - - switch (nfc->cmd) { - case ETHTOOL_SRXFH: - ret = otx2_set_rss_hash_opts(pfvf, nfc); - break; - default: - break; - } - - return ret; -} - static u32 otx2_get_rxfh_key_size(struct net_device *netdev) { struct otx2_nic *pfvf = netdev_priv(netdev); @@ -1357,8 +1327,8 @@ static const struct ethtool_ops otx2vf_ethtool_ops = { .get_sset_count = otx2vf_get_sset_count, .set_channels = otx2_set_channels, .get_channels = otx2_get_channels, - .get_rxnfc = otx2vf_get_rxnfc, - .set_rxnfc = otx2vf_set_rxnfc, + .get_rxnfc = otx2_get_rxnfc, + .set_rxnfc = otx2_set_rxnfc, .get_rxfh_key_size = otx2_get_rxfh_key_size, .get_rxfh_indir_size = otx2_get_rxfh_indir_size, .get_rxfh = otx2_get_rxfh, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 4d9de525802d..2a25588a01ed 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -5,11 +5,14 @@ */ #include <net/ipv6.h> +#include <linux/sort.h> #include "otx2_common.h" #define OTX2_DEFAULT_ACTION 0x1 +static int otx2_mcam_entry_init(struct otx2_nic *pfvf); + struct otx2_flow { struct ethtool_rx_flow_spec flow_spec; struct list_head list; @@ -30,8 +33,7 @@ static void otx2_clear_ntuple_flow_info(struct otx2_nic *pfvf, struct otx2_flow_ { devm_kfree(pfvf->dev, flow_cfg->flow_ent); flow_cfg->flow_ent = NULL; - flow_cfg->ntuple_max_flows = 0; - flow_cfg->tc_max_flows = 0; + flow_cfg->max_flows = 0; } static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf) @@ -40,11 +42,11 @@ static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf) struct npc_mcam_free_entry_req *req; int ent, err; - if (!flow_cfg->ntuple_max_flows) + if (!flow_cfg->max_flows) return 0; mutex_lock(&pfvf->mbox.lock); - for (ent = 0; ent < flow_cfg->ntuple_max_flows; ent++) { + for (ent = 0; ent < flow_cfg->max_flows; ent++) { req = otx2_mbox_alloc_msg_npc_mcam_free_entry(&pfvf->mbox); if (!req) break; @@ -61,7 +63,12 @@ static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf) return 0; } -static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count) +static int mcam_entry_cmp(const void *a, const void *b) +{ + return *(u16 *)a - *(u16 *)b; +} + +int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count) { struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; struct npc_mcam_alloc_entry_req *req; @@ -76,8 +83,12 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count) flow_cfg->flow_ent = devm_kmalloc_array(pfvf->dev, count, sizeof(u16), GFP_KERNEL); - if (!flow_cfg->flow_ent) + if (!flow_cfg->flow_ent) { + netdev_err(pfvf->netdev, + "%s: Unable to allocate memory for flow entries\n", + __func__); return -ENOMEM; + } mutex_lock(&pfvf->mbox.lock); @@ -92,8 +103,14 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count) req->contig = false; req->count = (count - allocated) > NPC_MAX_NONCONTIG_ENTRIES ? NPC_MAX_NONCONTIG_ENTRIES : count - allocated; - req->priority = NPC_MCAM_HIGHER_PRIO; - req->ref_entry = flow_cfg->def_ent[0]; + + /* Allocate higher priority entries for PFs, so that VF's entries + * will be on top of PF. + */ + if (!is_otx2_vf(pfvf->pcifunc)) { + req->priority = NPC_MCAM_HIGHER_PRIO; + req->ref_entry = flow_cfg->def_ent[0]; + } /* Send message to AF */ if (otx2_sync_mbox_msg(&pfvf->mbox)) @@ -114,22 +131,34 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count) break; } + /* Multiple MCAM entry alloc requests could result in non-sequential + * MCAM entries in the flow_ent[] array. Sort them in an ascending order, + * otherwise user installed ntuple filter index and MCAM entry index will + * not be in sync. + */ + if (allocated) + sort(&flow_cfg->flow_ent[0], allocated, + sizeof(flow_cfg->flow_ent[0]), mcam_entry_cmp, NULL); + exit: mutex_unlock(&pfvf->mbox.lock); - flow_cfg->ntuple_offset = 0; - flow_cfg->ntuple_max_flows = allocated; - flow_cfg->tc_max_flows = allocated; + flow_cfg->max_flows = allocated; + + if (allocated) { + pfvf->flags |= OTX2_FLAG_MCAM_ENTRIES_ALLOC; + pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT; + } if (allocated != count) netdev_info(pfvf->netdev, - "Unable to allocate %d MCAM entries for ntuple, got %d\n", + "Unable to allocate %d MCAM entries, got only %d\n", count, allocated); - return allocated; } +EXPORT_SYMBOL(otx2_alloc_mcam_entries); -int otx2_alloc_mcam_entries(struct otx2_nic *pfvf) +static int otx2_mcam_entry_init(struct otx2_nic *pfvf) { struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; struct npc_mcam_alloc_entry_req *req; @@ -189,18 +218,35 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf) mutex_unlock(&pfvf->mbox.lock); /* Allocate entries for Ntuple filters */ - count = otx2_alloc_ntuple_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT); + count = otx2_alloc_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT); if (count <= 0) { otx2_clear_ntuple_flow_info(pfvf, flow_cfg); return 0; } - pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT; pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT; return 0; } +int otx2vf_mcam_flow_init(struct otx2_nic *pfvf) +{ + struct otx2_flow_config *flow_cfg; + + pfvf->flow_cfg = devm_kzalloc(pfvf->dev, + sizeof(struct otx2_flow_config), + GFP_KERNEL); + if (!pfvf->flow_cfg) + return -ENOMEM; + + flow_cfg = pfvf->flow_cfg; + INIT_LIST_HEAD(&flow_cfg->flow_list); + flow_cfg->max_flows = 0; + + return 0; +} +EXPORT_SYMBOL(otx2vf_mcam_flow_init); + int otx2_mcam_flow_init(struct otx2_nic *pf) { int err; @@ -212,7 +258,10 @@ int otx2_mcam_flow_init(struct otx2_nic *pf) INIT_LIST_HEAD(&pf->flow_cfg->flow_list); - err = otx2_alloc_mcam_entries(pf); + /* Allocate bare minimum number of MCAM entries needed for + * unicast and ntuple filters. + */ + err = otx2_mcam_entry_init(pf); if (err) return err; @@ -248,6 +297,7 @@ void otx2_mcam_flow_del(struct otx2_nic *pf) { otx2_destroy_mcam_flows(pf); } +EXPORT_SYMBOL(otx2_mcam_flow_del); /* On success adds mcam entry * On failure enable promisous mode @@ -379,15 +429,19 @@ static void otx2_add_flow_to_list(struct otx2_nic *pfvf, struct otx2_flow *flow) list_add(&flow->list, head); } -static int otx2_get_maxflows(struct otx2_flow_config *flow_cfg) +int otx2_get_maxflows(struct otx2_flow_config *flow_cfg) { - if (flow_cfg->nr_flows == flow_cfg->ntuple_max_flows || + if (!flow_cfg) + return 0; + + if (flow_cfg->nr_flows == flow_cfg->max_flows || bitmap_weight(&flow_cfg->dmacflt_bmap, flow_cfg->dmacflt_max_flows)) - return flow_cfg->ntuple_max_flows + flow_cfg->dmacflt_max_flows; + return flow_cfg->max_flows + flow_cfg->dmacflt_max_flows; else - return flow_cfg->ntuple_max_flows; + return flow_cfg->max_flows; } +EXPORT_SYMBOL(otx2_get_maxflows); int otx2_get_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, u32 location) @@ -732,7 +786,7 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, ether_addr_copy(pmask->dmac, eth_mask->h_dest); req->features |= BIT_ULL(NPC_DMAC); } - if (eth_mask->h_proto) { + if (eth_hdr->h_proto) { memcpy(&pkt->etype, ð_hdr->h_proto, sizeof(pkt->etype)); memcpy(&pmask->etype, ð_mask->h_proto, @@ -767,11 +821,6 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, if (fsp->m_ext.vlan_etype) return -EINVAL; if (fsp->m_ext.vlan_tci) { - if (fsp->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK)) - return -EINVAL; - if (be16_to_cpu(fsp->h_ext.vlan_tci) >= VLAN_N_VID) - return -EINVAL; - memcpy(&pkt->vlan_tci, &fsp->h_ext.vlan_tci, sizeof(pkt->vlan_tci)); memcpy(&pmask->vlan_tci, &fsp->m_ext.vlan_tci, @@ -894,7 +943,7 @@ static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf, pf_mac->entry = 0; pf_mac->dmac_filter = true; - pf_mac->location = pfvf->flow_cfg->ntuple_max_flows; + pf_mac->location = pfvf->flow_cfg->max_flows; memcpy(&pf_mac->flow_spec, &flow->flow_spec, sizeof(struct ethtool_rx_flow_spec)); pf_mac->flow_spec.location = pf_mac->location; @@ -975,7 +1024,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) flow->dmac_filter = true; flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap, flow_cfg->dmacflt_max_flows); - fsp->location = flow_cfg->ntuple_max_flows + flow->entry; + fsp->location = flow_cfg->max_flows + flow->entry; flow->flow_spec.location = fsp->location; flow->location = fsp->location; @@ -983,11 +1032,11 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) otx2_dmacflt_add(pfvf, eth_hdr->h_dest, flow->entry); } else { - if (flow->location >= pfvf->flow_cfg->ntuple_max_flows) { + if (flow->location >= pfvf->flow_cfg->max_flows) { netdev_warn(pfvf->netdev, "Can't insert non dmac ntuple rule at %d, allowed range %d-0\n", flow->location, - flow_cfg->ntuple_max_flows - 1); + flow_cfg->max_flows - 1); err = -EINVAL; } else { flow->entry = flow_cfg->flow_ent[flow->location]; @@ -996,6 +1045,8 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) } if (err) { + if (err == MBOX_MSG_INVALID) + err = -EINVAL; if (new) kfree(flow); return err; @@ -1140,7 +1191,7 @@ int otx2_destroy_ntuple_flows(struct otx2_nic *pfvf) } req->start = flow_cfg->flow_ent[0]; - req->end = flow_cfg->flow_ent[flow_cfg->ntuple_max_flows - 1]; + req->end = flow_cfg->flow_ent[flow_cfg->max_flows - 1]; err = otx2_sync_mbox_msg(&pfvf->mbox); mutex_unlock(&pfvf->mbox.lock); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 22b7af029ebf..7dd56c9392ab 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1787,17 +1787,10 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) static netdev_features_t otx2_fix_features(struct net_device *dev, netdev_features_t features) { - /* check if n-tuple filters are ON */ - if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) { - netdev_info(dev, "Disabling n-tuple filters\n"); - features &= ~NETIF_F_NTUPLE; - } - - /* check if tc hw offload is ON */ - if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) { - netdev_info(dev, "Disabling TC hardware offload\n"); - features &= ~NETIF_F_HW_TC; - } + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_STAG_RX; + else + features &= ~NETIF_F_HW_VLAN_STAG_RX; return features; } @@ -1854,6 +1847,7 @@ static int otx2_set_features(struct net_device *netdev, netdev_features_t changed = features ^ netdev->features; bool ntuple = !!(features & NETIF_F_NTUPLE); struct otx2_nic *pf = netdev_priv(netdev); + bool tc = !!(features & NETIF_F_HW_TC); if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) return otx2_cgx_config_loopback(pf, @@ -1866,12 +1860,26 @@ static int otx2_set_features(struct net_device *netdev, if ((changed & NETIF_F_NTUPLE) && !ntuple) otx2_destroy_ntuple_flows(pf); - if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) && - pf->tc_info.num_entries) { + if ((changed & NETIF_F_HW_TC) && !tc && + pf->flow_cfg && pf->flow_cfg->nr_flows) { netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n"); return -EBUSY; } + if ((changed & NETIF_F_NTUPLE) && ntuple && + (netdev->features & NETIF_F_HW_TC) && !(changed & NETIF_F_HW_TC)) { + netdev_err(netdev, + "Can't enable NTUPLE when TC is active, disable TC and retry\n"); + return -EINVAL; + } + + if ((changed & NETIF_F_HW_TC) && tc && + (netdev->features & NETIF_F_NTUPLE) && !(changed & NETIF_F_NTUPLE)) { + netdev_err(netdev, + "Can't enable TC when NTUPLE is active, disable NTUPLE and retry\n"); + return -EINVAL; + } + return 0; } @@ -2569,8 +2577,6 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) NETIF_F_GSO_UDP_L4); netdev->features |= netdev->hw_features; - netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL; - err = otx2_mcam_flow_init(pf); if (err) goto err_ptp_destroy; @@ -2594,12 +2600,13 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT) netdev->hw_features |= NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL; + netdev->gso_max_segs = OTX2_MAX_GSO_SEGS; netdev->watchdog_timeo = OTX2_TX_TIMEOUT; netdev->netdev_ops = &otx2_netdev_ops; - /* MTU range: 64 - 9190 */ netdev->min_mtu = OTX2_MIN_MTU; netdev->max_mtu = otx2_get_max_mtu(pf); @@ -2619,6 +2626,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_mcam_flow_del; + err = otx2_register_dl(pf); + if (err) + goto err_mcam_flow_del; + /* Initialize SR-IOV resources */ err = otx2_sriov_vfcfg_init(pf); if (err) @@ -2776,6 +2787,7 @@ static void otx2_remove(struct pci_dev *pdev) /* Disable link notifications */ otx2_cgx_config_linkevents(pf, false); + otx2_unregister_dl(pf); unregister_netdev(netdev); otx2_sriov_disable(pf->pdev); otx2_sriov_vfcfg_cleanup(pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 972b202b9884..81840b625c68 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -52,6 +52,29 @@ struct otx2_tc_flow { bool is_act_police; }; +int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic) +{ + struct otx2_tc_info *tc = &nic->tc_info; + + if (!nic->flow_cfg->max_flows || is_otx2_vf(nic->pcifunc)) + return 0; + + /* Max flows changed, free the existing bitmap */ + kfree(tc->tc_entries_bitmap); + + tc->tc_entries_bitmap = + kcalloc(BITS_TO_LONGS(nic->flow_cfg->max_flows), + sizeof(long), GFP_KERNEL); + if (!tc->tc_entries_bitmap) { + netdev_err(nic->netdev, + "Unable to alloc TC flow entries bitmap\n"); + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap); + static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp, u32 *burst_mantissa) { @@ -596,6 +619,7 @@ static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry) static int otx2_tc_del_flow(struct otx2_nic *nic, struct flow_cls_offload *tc_flow_cmd) { + struct otx2_flow_config *flow_cfg = nic->flow_cfg; struct otx2_tc_info *tc_info = &nic->tc_info; struct otx2_tc_flow *flow_node; int err; @@ -638,7 +662,7 @@ static int otx2_tc_del_flow(struct otx2_nic *nic, kfree_rcu(flow_node, rcu); clear_bit(flow_node->bitpos, tc_info->tc_entries_bitmap); - tc_info->num_entries--; + flow_cfg->nr_flows--; return 0; } @@ -647,6 +671,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, struct flow_cls_offload *tc_flow_cmd) { struct netlink_ext_ack *extack = tc_flow_cmd->common.extack; + struct otx2_flow_config *flow_cfg = nic->flow_cfg; struct otx2_tc_info *tc_info = &nic->tc_info; struct otx2_tc_flow *new_node, *old_node; struct npc_install_flow_req *req, dummy; @@ -655,9 +680,9 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)) return -ENOMEM; - if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) { + if (bitmap_full(tc_info->tc_entries_bitmap, flow_cfg->max_flows)) { NL_SET_ERR_MSG_MOD(extack, - "Not enough MCAM space to add the flow"); + "Free MCAM entry not available to add the flow"); return -ENOMEM; } @@ -695,10 +720,9 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, memcpy(req, &dummy, sizeof(struct npc_install_flow_req)); new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap, - nic->flow_cfg->tc_max_flows); + flow_cfg->max_flows); req->channel = nic->hw.rx_chan_base; - req->entry = nic->flow_cfg->flow_ent[nic->flow_cfg->tc_flower_offset + - nic->flow_cfg->tc_max_flows - new_node->bitpos]; + req->entry = flow_cfg->flow_ent[flow_cfg->max_flows - new_node->bitpos - 1]; req->intf = NIX_INTF_RX; req->set_cntr = 1; new_node->entry = req->entry; @@ -723,7 +747,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, } set_bit(new_node->bitpos, tc_info->tc_entries_bitmap); - tc_info->num_entries++; + flow_cfg->nr_flows++; return 0; @@ -1008,10 +1032,21 @@ static const struct rhashtable_params tc_flow_ht_params = { int otx2_init_tc(struct otx2_nic *nic) { struct otx2_tc_info *tc = &nic->tc_info; + int err; /* Exclude receive queue 0 being used for police action */ set_bit(0, &nic->rq_bmap); + if (!nic->flow_cfg) { + netdev_err(nic->netdev, + "Can't init TC, nic->flow_cfg is not setup\n"); + return -EINVAL; + } + + err = otx2_tc_alloc_ent_bitmap(nic); + if (err) + return err; + tc->flow_ht_params = tc_flow_ht_params; return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); } @@ -1020,5 +1055,6 @@ void otx2_shutdown_tc(struct otx2_nic *nic) { struct otx2_tc_info *tc = &nic->tc_info; + kfree(tc->tc_entries_bitmap); rhashtable_destroy(&tc->flow_table); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index a8bee5aefec1..58b912653ac2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -464,6 +464,28 @@ static void otx2vf_reset_task(struct work_struct *work) rtnl_unlock(); } +static int otx2vf_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = features ^ netdev->features; + bool ntuple_enabled = !!(features & NETIF_F_NTUPLE); + struct otx2_nic *vf = netdev_priv(netdev); + + if (changed & NETIF_F_NTUPLE) { + if (!ntuple_enabled) { + otx2_mcam_flow_del(vf); + return 0; + } + + if (!otx2_get_maxflows(vf->flow_cfg)) { + netdev_err(netdev, + "Can't enable NTUPLE, MCAM entries not allocated\n"); + return -EINVAL; + } + } + return 0; +} + static const struct net_device_ops otx2vf_netdev_ops = { .ndo_open = otx2vf_open, .ndo_stop = otx2vf_stop, @@ -471,6 +493,7 @@ static const struct net_device_ops otx2vf_netdev_ops = { .ndo_set_rx_mode = otx2vf_set_rx_mode, .ndo_set_mac_address = otx2_set_mac_address, .ndo_change_mtu = otx2vf_change_mtu, + .ndo_set_features = otx2vf_set_features, .ndo_get_stats64 = otx2_get_stats64, .ndo_tx_timeout = otx2_tx_timeout, }; @@ -627,12 +650,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) NETIF_F_HW_VLAN_STAG_TX; netdev->features |= netdev->hw_features; + netdev->hw_features |= NETIF_F_NTUPLE; + netdev->hw_features |= NETIF_F_RXALL; + netdev->gso_max_segs = OTX2_MAX_GSO_SEGS; netdev->watchdog_timeo = OTX2_TX_TIMEOUT; netdev->netdev_ops = &otx2vf_netdev_ops; - /* MTU range: 68 - 9190 */ netdev->min_mtu = OTX2_MIN_MTU; netdev->max_mtu = otx2_get_max_mtu(vf); @@ -658,6 +683,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) otx2vf_set_ethtool_ops(netdev); + err = otx2vf_mcam_flow_init(vf); + if (err) + goto err_unreg_netdev; + + err = otx2_register_dl(vf); + if (err) + goto err_unreg_netdev; + /* Enable pause frames by default */ vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED; @@ -695,6 +728,7 @@ static void otx2vf_remove(struct pci_dev *pdev) vf = netdev_priv(netdev); cancel_work_sync(&vf->reset_task); + otx2_unregister_dl(vf); unregister_netdev(netdev); if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index d12e21db9fd6..68b442eb6d69 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = { .trap_drop_counter_get = prestera_drop_counter_get, }; -struct prestera_switch *prestera_devlink_alloc(void) +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev) { struct devlink *dl; - dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch)); + dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch), + dev->dev); return devlink_priv(dl); } @@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw) struct devlink *dl = priv_to_devlink(sw); int err; - err = devlink_register(dl, sw->dev->dev); + err = devlink_register(dl); if (err) { dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err); return err; @@ -530,6 +531,8 @@ err_trap_register: prestera_trap = &prestera_trap_items_arr[i]; devlink_traps_unregister(devlink, &prestera_trap->trap, 1); } + devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr, + groups_count); err_groups_register: kfree(trap_data->trap_items_arr); err_trap_items_alloc: diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h index 5d73aa9db897..cc34c3db13a2 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h @@ -6,7 +6,7 @@ #include "prestera.h" -struct prestera_switch *prestera_devlink_alloc(void); +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev); void prestera_devlink_free(struct prestera_switch *sw); int prestera_devlink_register(struct prestera_switch *sw); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 7c569c1abefc..44c670807fb3 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -905,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev) struct prestera_switch *sw; int err; - sw = prestera_devlink_alloc(); + sw = prestera_devlink_alloc(dev); if (!sw) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index be01ec8284e6..3ce6ccd0f539 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -758,7 +758,7 @@ static void prestera_fdb_offload_notify(struct prestera_port *port, struct switchdev_notifier_fdb_info *info) { - struct switchdev_notifier_fdb_info send_info; + struct switchdev_notifier_fdb_info send_info = {}; send_info.addr = info->addr; send_info.vid = info->vid; @@ -1133,7 +1133,7 @@ static int prestera_switchdev_blk_event(struct notifier_block *unused, static void prestera_fdb_event(struct prestera_switch *sw, struct prestera_event *evt, void *arg) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; struct net_device *dev = NULL; struct prestera_port *port; struct prestera_lag *lag; diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 400e611ba041..1b4b1f642317 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -6,8 +6,8 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" depends on PCI && NETDEVICES && ETHERNET && INET + depends on PTP_1588_CLOCK_OPTIONAL select MLX4_CORE - imply PTP_1588_CLOCK help This driver supports Mellanox Technologies ConnectX Ethernet devices. diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 28ac4693da3c..7267c6c6d2e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4005,7 +4005,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) printk_once(KERN_INFO "%s", mlx4_version); - devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv)); + devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); @@ -4024,7 +4024,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mutex_init(&dev->persist->interface_state_mutex); mutex_init(&dev->persist->pci_status_mutex); - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) goto err_persist_free; ret = devlink_params_register(devlink, mlx4_devlink_params, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 2584bc038f94..b149e601f673 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -739,7 +739,7 @@ static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev) int i; for (i = 0; - i < sizeof(qp_table->zones_uids)/sizeof(qp_table->zones_uids[0]); + i < ARRAY_SIZE(qp_table->zones_uids); i++) { struct mlx4_bitmap *bitmap = mlx4_zone_get_bitmap(qp_table->zones, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index e1a5a79e27c7..92056452a9e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -10,7 +10,7 @@ config MLX5_CORE select NET_DEVLINK depends on VXLAN || !VXLAN depends on MLXFW || !MLXFW - depends on PTP_1588_CLOCK || !PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE help Core driver for low level functionality of the ConnectX-4 and diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 33e550d77fa6..63032cd6efb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -22,13 +22,13 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ # # Netdev basic # -mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ +mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ + en/channels.o en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \ - en/rx_res.o en/channels.o + en/qos.o en/trap.o en/fs_tt_redirect.o # # Netdev extra @@ -44,19 +44,22 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ lib/fs_chains.o en/tc_tun.o \ esw/indir_table.o en/tc_tun_encap.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ - en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o + en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ + en/tc/post_act.o mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o +mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o esw/legacy.o + ecpf.o rdma.o esw/legacy.o \ + esw/devlink_port.o esw/vporttbl.o esw/qos.o + mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ - esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \ - esw/devlink_port.o esw/vporttbl.o -mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o + esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o + mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 9d79c5ec31e9..db5dfff585c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -877,7 +877,7 @@ static void cb_timeout_handler(struct work_struct *work) ent->ret = -ETIMEDOUT; mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); out: cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ @@ -994,7 +994,7 @@ static void cmd_work_handler(struct work_struct *work) MLX5_SET(mbox_out, ent->out, status, status); MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); return; } @@ -1008,7 +1008,7 @@ static void cmd_work_handler(struct work_struct *work) poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT)); } } @@ -1068,7 +1068,7 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); } static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index df3e4938ecdd..cf97985628ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -89,7 +89,8 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen) { - int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + c_eqn_or_apu_element); u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; struct mlx5_eq_comp *eq; int err; @@ -134,6 +135,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn); cq->uar = dev->priv.uar; + cq->irqn = eq->core.irqn; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index def2156e50ee..ff6b03dc7e32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -53,7 +53,7 @@ static bool is_eth_rep_supported(struct mlx5_core_dev *dev) return true; } -static bool is_eth_supported(struct mlx5_core_dev *dev) +bool mlx5_eth_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false; @@ -105,7 +105,18 @@ static bool is_eth_supported(struct mlx5_core_dev *dev) return true; } -static bool is_vnet_supported(struct mlx5_core_dev *dev) +static bool is_eth_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + &val); + return err ? false : val.vbool; +} + +bool mlx5_vnet_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET)) return false; @@ -127,6 +138,17 @@ static bool is_vnet_supported(struct mlx5_core_dev *dev) return true; } +static bool is_vnet_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + &val); + return err ? false : val.vbool; +} + static bool is_ib_rep_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) @@ -170,7 +192,7 @@ static bool is_mp_supported(struct mlx5_core_dev *dev) return true; } -static bool is_ib_supported(struct mlx5_core_dev *dev) +bool mlx5_rdma_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return false; @@ -187,6 +209,17 @@ static bool is_ib_supported(struct mlx5_core_dev *dev) return true; } +static bool is_ib_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + &val); + return err ? false : val.vbool; +} + enum { MLX5_INTERFACE_PROTOCOL_ETH, MLX5_INTERFACE_PROTOCOL_ETH_REP, @@ -201,13 +234,17 @@ enum { static const struct mlx5_adev_device { const char *suffix; bool (*is_supported)(struct mlx5_core_dev *dev); + bool (*is_enabled)(struct mlx5_core_dev *dev); } mlx5_adev_devices[] = { [MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet", - .is_supported = &is_vnet_supported }, + .is_supported = &mlx5_vnet_supported, + .is_enabled = &is_vnet_enabled }, [MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma", - .is_supported = &is_ib_supported }, + .is_supported = &mlx5_rdma_supported, + .is_enabled = &is_ib_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth", - .is_supported = &is_eth_supported }, + .is_supported = &mlx5_eth_supported, + .is_enabled = &is_eth_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep", .is_supported = &is_eth_rep_supported }, [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep", @@ -308,6 +345,14 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) if (!priv->adev[i]) { bool is_supported = false; + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + continue; + } + if (mlx5_adev_devices[i].is_supported) is_supported = mlx5_adev_devices[i].is_supported(dev); @@ -360,6 +405,14 @@ void mlx5_detach_device(struct mlx5_core_dev *dev) if (!priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto skip_suspend; + } + adev = &priv->adev[i]->adev; /* Auxiliary driver was unbind manually through sysfs */ if (!adev->dev.driver) @@ -447,12 +500,21 @@ static void delete_drivers(struct mlx5_core_dev *dev) if (!priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto del_adev; + } + if (mlx5_adev_devices[i].is_supported && !delete_all) is_supported = mlx5_adev_devices[i].is_supported(dev); if (is_supported) continue; +del_adev: del_adev(&priv->adev[i]->adev); priv->adev[i] = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d791d351b489..e84287ffc7ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,6 +7,7 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" +#include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -292,6 +293,13 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get, .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set, + .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, + .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, + .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, + .rate_node_new = mlx5_esw_devlink_rate_node_new, + .rate_node_del = mlx5_esw_devlink_rate_node_del, + .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, @@ -359,9 +367,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, return 0; } -struct devlink *mlx5_devlink_alloc(void) +struct devlink *mlx5_devlink_alloc(struct device *dev) { - return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev), + dev); } void mlx5_devlink_free(struct devlink *devlink) @@ -595,6 +604,157 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) #endif } +static const struct devlink_param enable_eth_param = + DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL); + +static int mlx5_devlink_eth_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!mlx5_eth_supported(dev)) + return 0; + + err = devlink_param_register(devlink, &enable_eth_param); + if (err) + return err; + + value.vbool = true; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + value); + devlink_param_publish(devlink, &enable_eth_param); + return 0; +} + +static void mlx5_devlink_eth_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!mlx5_eth_supported(dev)) + return; + + devlink_param_unpublish(devlink, &enable_eth_param); + devlink_param_unregister(devlink, &enable_eth_param); +} + +static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !mlx5_rdma_supported(dev)) + return -EOPNOTSUPP; + return 0; +} + +static const struct devlink_param enable_rdma_param = + DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_rdma_validate); + +static int mlx5_devlink_rdma_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev)) + return 0; + + err = devlink_param_register(devlink, &enable_rdma_param); + if (err) + return err; + + value.vbool = true; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + value); + devlink_param_publish(devlink, &enable_rdma_param); + return 0; +} + +static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev)) + return; + + devlink_param_unpublish(devlink, &enable_rdma_param); + devlink_param_unregister(devlink, &enable_rdma_param); +} + +static const struct devlink_param enable_vnet_param = + DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL); + +static int mlx5_devlink_vnet_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!mlx5_vnet_supported(dev)) + return 0; + + err = devlink_param_register(devlink, &enable_vnet_param); + if (err) + return err; + + value.vbool = true; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + value); + devlink_param_publish(devlink, &enable_rdma_param); + return 0; +} + +static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!mlx5_vnet_supported(dev)) + return; + + devlink_param_unpublish(devlink, &enable_vnet_param); + devlink_param_unregister(devlink, &enable_vnet_param); +} + +static int mlx5_devlink_auxdev_params_register(struct devlink *devlink) +{ + int err; + + err = mlx5_devlink_eth_param_register(devlink); + if (err) + return err; + + err = mlx5_devlink_rdma_param_register(devlink); + if (err) + goto rdma_err; + + err = mlx5_devlink_vnet_param_register(devlink); + if (err) + goto vnet_err; + return 0; + +vnet_err: + mlx5_devlink_rdma_param_unregister(devlink); +rdma_err: + mlx5_devlink_eth_param_unregister(devlink); + return err; +} + +static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink) +{ + mlx5_devlink_vnet_param_unregister(devlink); + mlx5_devlink_rdma_param_unregister(devlink); + mlx5_devlink_eth_param_unregister(devlink); +} + #define MLX5_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ @@ -638,11 +798,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink) ARRAY_SIZE(mlx5_trap_groups_arr)); } -int mlx5_devlink_register(struct devlink *devlink, struct device *dev) +int mlx5_devlink_register(struct devlink *devlink) { int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) return err; @@ -653,6 +813,10 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev) mlx5_devlink_set_params_init_values(devlink); devlink_params_publish(devlink); + err = mlx5_devlink_auxdev_params_register(devlink); + if (err) + goto auxdev_reg_err; + err = mlx5_devlink_traps_register(devlink); if (err) goto traps_reg_err; @@ -660,6 +824,8 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev) return 0; traps_reg_err: + mlx5_devlink_auxdev_params_unregister(devlink); +auxdev_reg_err: devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); params_reg_err: @@ -670,6 +836,8 @@ params_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { mlx5_devlink_traps_unregister(devlink); + mlx5_devlink_auxdev_params_unregister(devlink); + devlink_params_unpublish(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 7318d44b774b..30bf4882779b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, enum devlink_trap_action *action); -struct devlink *mlx5_devlink_alloc(void); +struct devlink *mlx5_devlink_alloc(struct device *dev); void mlx5_devlink_free(struct devlink *devlink); -int mlx5_devlink_register(struct devlink *devlink, struct device *dev); +int mlx5_devlink_register(struct devlink *devlink); void mlx5_devlink_unregister(struct devlink *devlink); #endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 01a1d02dcf15..3f8a98093f8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -1019,12 +1019,19 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); mlx5_eq_notifier_register(dev, &tracer->nb); - mlx5_fw_tracer_start(tracer); - + err = mlx5_fw_tracer_start(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err); + goto err_notifier_unregister; + } return 0; +err_notifier_unregister: + mlx5_eq_notifier_unregister(dev, &tracer->nb); + mlx5_core_destroy_mkey(dev, &tracer->buff.mkey); err_dealloc_pd: mlx5_core_dealloc_pd(dev, tracer->buff.pdn); + cancel_work_sync(&tracer->read_fw_strings_work); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4f6897c1ea8d..669a75f3537a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -72,6 +72,7 @@ struct page_pool; #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) #define MLX5E_MAX_NUM_TC 8 +#define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE #define MLX5_RX_HEADROOM NET_SKB_PAD #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ @@ -248,7 +249,10 @@ struct mlx5e_params { u8 rq_wq_type; u8 log_rq_mtu_frames; u16 num_channels; - u8 num_tc; + struct { + u16 mode; + u8 num_tc; + } mqprio; bool rx_cqe_compress_def; bool tunneled_offload_en; struct dim_cq_moder rx_cq_moderation; @@ -268,6 +272,12 @@ struct mlx5e_params { bool ptp_rx; }; +static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) +{ + return params->mqprio.mode == TC_MQPRIO_MODE_DCB ? + params->mqprio.num_tc : 1; +} + enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_RECOVERING, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index e348c276eaa1..41684a6c44e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -7,6 +7,8 @@ #include "mod_hdr.h" #include "lib/fs_ttc.h" +struct mlx5e_post_act; + enum { MLX5E_TC_FT_LEVEL = 0, MLX5E_TC_TTC_FT_LEVEL, @@ -19,6 +21,7 @@ struct mlx5e_tc_table { struct mutex t_lock; struct mlx5_flow_table *t; struct mlx5_fs_chains *chains; + struct mlx5e_post_act *post_act; struct rhashtable ht; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c index ea321e528749..4e72ca8070e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -5,11 +5,15 @@ #include <linux/slab.h> #include <linux/xarray.h> #include <linux/hashtable.h> +#include <linux/refcount.h> #include "mapping.h" #define MAPPING_GRACE_PERIOD 2000 +static LIST_HEAD(shared_ctx_list); +static DEFINE_MUTEX(shared_ctx_lock); + struct mapping_ctx { struct xarray xarray; DECLARE_HASHTABLE(ht, 8); @@ -20,6 +24,10 @@ struct mapping_ctx { struct delayed_work dwork; struct list_head pending_list; spinlock_t pending_list_lock; /* Guards pending list */ + u64 id; + u8 type; + struct list_head list; + refcount_t refcount; }; struct mapping_item { @@ -205,11 +213,48 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal) mutex_init(&ctx->lock); xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + refcount_set(&ctx->refcount, 1); + INIT_LIST_HEAD(&ctx->list); + + return ctx; +} + +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + mutex_lock(&shared_ctx_lock); + list_for_each_entry(ctx, &shared_ctx_list, list) { + if (ctx->id == id && ctx->type == type) { + if (refcount_inc_not_zero(&ctx->refcount)) + goto unlock; + break; + } + } + + ctx = mapping_create(data_size, max_id, delayed_removal); + if (IS_ERR(ctx)) + goto unlock; + + ctx->id = id; + ctx->type = type; + list_add(&ctx->list, &shared_ctx_list); + +unlock: + mutex_unlock(&shared_ctx_lock); return ctx; } void mapping_destroy(struct mapping_ctx *ctx) { + if (!refcount_dec_and_test(&ctx->refcount)) + return; + + mutex_lock(&shared_ctx_lock); + list_del(&ctx->list); + mutex_unlock(&shared_ctx_lock); + mapping_flush_work(ctx); xa_destroy(&ctx->xarray); mutex_destroy(&ctx->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h index 285525cc5470..4e2119f0f4c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -24,4 +24,9 @@ struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, bool delayed_removal); void mapping_destroy(struct mapping_ctx *ctx); +/* adds mapping with an id or get an existing mapping with the same id + */ +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal); + #endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index f479ef31ca40..ee688dec67a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -326,13 +326,14 @@ static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c, struct mlx5e_ptp_params *cparams) { struct mlx5e_params *params = &cparams->params; + u8 num_tc = mlx5e_get_dcb_num_tc(params); int ix_base; int err; int tc; - ix_base = params->num_tc * params->num_channels; + ix_base = num_tc * params->num_channels; - for (tc = 0; tc < params->num_tc; tc++) { + for (tc = 0; tc < num_tc; tc++) { int txq_ix = ix_base + tc; err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, @@ -365,9 +366,12 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, struct mlx5e_create_cq_param ccp = {}; struct dim_cq_moder ptp_moder = {}; struct mlx5e_cq_param *cq_param; + u8 num_tc; int err; int tc; + num_tc = mlx5e_get_dcb_num_tc(params); + ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); ccp.ch_stats = c->stats; ccp.napi = &c->napi; @@ -375,7 +379,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, cq_param = &cparams->txq_sq_param.cqp; - for (tc = 0; tc < params->num_tc; tc++) { + for (tc = 0; tc < num_tc; tc++) { struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq; err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); @@ -383,7 +387,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, goto out_err_txqsq_cq; } - for (tc = 0; tc < params->num_tc; tc++) { + for (tc = 0; tc < num_tc; tc++) { struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq; struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc]; @@ -399,7 +403,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, out_err_ts_cq: for (--tc; tc >= 0; tc--) mlx5e_close_cq(&c->ptpsq[tc].ts_cq); - tc = params->num_tc; + tc = num_tc; out_err_txqsq_cq: for (--tc; tc >= 0; tc--) mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq); @@ -475,7 +479,7 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, params->num_channels = orig->num_channels; params->hard_mtu = orig->hard_mtu; params->sw_mtu = orig->sw_mtu; - params->num_tc = orig->num_tc; + params->mqprio = orig->mqprio; /* SQ */ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { @@ -680,7 +684,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); - c->num_tc = params->num_tc; + c->num_tc = mlx5e_get_dcb_num_tc(params); c->stats = &priv->ptp_stats.ch; c->lag_port = lag_port; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 5efe3278b0f6..c9ac69f62f21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -132,7 +132,7 @@ static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) */ bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS); - return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid; + return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid; } int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 3c0032c9647c..0c38c2e319be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -15,9 +15,116 @@ struct mlx5_bridge_switchdev_fdb_work { struct work_struct work; struct switchdev_notifier_fdb_info fdb_info; struct net_device *dev; + struct mlx5_esw_bridge_offloads *br_offloads; bool add; }; +static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return esw == priv->mdev->priv.eswitch; +} + +static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev, *esw_mdev; + u64 system_guid, esw_system_guid; + + mdev = priv->mdev; + esw_mdev = esw->dev; + + system_guid = mlx5_query_nic_system_image_guid(mdev); + esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev); + + return system_guid == esw_system_guid; +} + +static struct net_device * +mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct net_device *lower; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(lower)) + continue; + + priv = netdev_priv(lower); + mdev = priv->mdev; + if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + return lower; + } + + return NULL; +} + +static struct net_device * +mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw, + u16 *vport_num, u16 *esw_owner_vhca_id) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + + if (netif_is_lag_master(dev)) + dev = mlx5_esw_bridge_lag_rep_get(dev, esw); + + if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw)) + return NULL; + + priv = netdev_priv(dev); + rpriv = priv->ppriv; + *vport_num = rpriv->rep->vport; + *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id); + return dev; +} + +static struct net_device * +mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw, + u16 *vport_num, u16 *esw_owner_vhca_id) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev)) + return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num, + esw_owner_vhca_id); + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + struct net_device *rep; + + if (netif_is_bridge_master(lower_dev)) + continue; + + rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num, + esw_owner_vhca_id); + if (rep) + return rep; + } + + return NULL; +} + +static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep, + struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5_esw_bridge_dev_same_esw(rep, esw)) + return false; + + priv = netdev_priv(rep); + mdev = priv->mdev; + if (netif_is_lag_master(dev)) + return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return true; +} + static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr) { struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, @@ -25,37 +132,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr netdev_nb); struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev, *rep; + struct mlx5_eswitch *esw = br_offloads->esw; + u16 vport_num, esw_owner_vhca_id; struct netlink_ext_ack *extack; - struct mlx5e_rep_priv *rpriv; - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - struct net_device *upper; - struct mlx5e_priv *priv; - u16 vport_num; - - if (!mlx5e_eswitch_rep(dev)) - return 0; + int ifindex = upper->ifindex; + int err; - upper = info->upper_dev; if (!netif_is_bridge_master(upper)) return 0; - esw = br_offloads->esw; - priv = netdev_priv(dev); - if (esw != priv->mdev->priv.eswitch) + rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id); + if (!rep) return 0; - rpriv = priv->ppriv; - vport_num = rpriv->rep->vport; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); - extack = netdev_notifier_info_to_extack(&info->info); - return info->linking ? - mlx5_esw_bridge_vport_link(upper->ifindex, br_offloads, vport, extack) : - mlx5_esw_bridge_vport_unlink(upper->ifindex, br_offloads, vport, extack); + if (mlx5_esw_bridge_is_local(dev, rep, esw)) + err = info->linking ? + mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack) : + mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack); + else if (mlx5_esw_bridge_dev_same_hw(rep, esw)) + err = info->linking ? + mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack) : + mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id, + br_offloads, extack); + + return err; } static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, @@ -75,31 +181,28 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, return notifier_from_errno(err); } -static int mlx5_esw_bridge_port_obj_add(struct net_device *dev, - const void *ctx, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) +static int +mlx5_esw_bridge_port_obj_add(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + struct mlx5_esw_bridge_offloads *br_offloads) { + struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info); + const struct switchdev_obj *obj = port_obj_info->obj; const struct switchdev_obj_port_vlan *vlan; - struct mlx5e_rep_priv *rpriv; - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - struct mlx5e_priv *priv; - u16 vport_num; - int err = 0; + u16 vport_num, esw_owner_vhca_id; + int err; - priv = netdev_priv(dev); - rpriv = priv->ppriv; - vport_num = rpriv->rep->vport; - esw = priv->mdev->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_obj_info->handled = true; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - err = mlx5_esw_bridge_port_vlan_add(vlan->vid, vlan->flags, esw, vport, extack); + err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid, + vlan->flags, br_offloads, extack); break; default: return -EOPNOTSUPP; @@ -107,29 +210,25 @@ static int mlx5_esw_bridge_port_obj_add(struct net_device *dev, return err; } -static int mlx5_esw_bridge_port_obj_del(struct net_device *dev, - const void *ctx, - const struct switchdev_obj *obj) +static int +mlx5_esw_bridge_port_obj_del(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + struct mlx5_esw_bridge_offloads *br_offloads) { + const struct switchdev_obj *obj = port_obj_info->obj; const struct switchdev_obj_port_vlan *vlan; - struct mlx5e_rep_priv *rpriv; - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - struct mlx5e_priv *priv; - u16 vport_num; + u16 vport_num, esw_owner_vhca_id; - priv = netdev_priv(dev); - rpriv = priv->ppriv; - vport_num = rpriv->rep->vport; - esw = priv->mdev->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_obj_info->handled = true; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - mlx5_esw_bridge_port_vlan_del(vlan->vid, esw, vport); + mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads); break; default: return -EOPNOTSUPP; @@ -137,25 +236,21 @@ static int mlx5_esw_bridge_port_obj_del(struct net_device *dev, return 0; } -static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, - const void *ctx, - const struct switchdev_attr *attr, - struct netlink_ext_ack *extack) +static int +mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, + struct switchdev_notifier_port_attr_info *port_attr_info, + struct mlx5_esw_bridge_offloads *br_offloads) { - struct mlx5e_rep_priv *rpriv; - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - struct mlx5e_priv *priv; - u16 vport_num; - int err = 0; + struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info); + const struct switchdev_attr *attr = port_attr_info->attr; + u16 vport_num, esw_owner_vhca_id; + int err; - priv = netdev_priv(dev); - rpriv = priv->ppriv; - vport_num = rpriv->rep->vport; - esw = priv->mdev->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); + if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_attr_info->handled = true; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: @@ -167,10 +262,12 @@ static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - err = mlx5_esw_bridge_ageing_time_set(attr->u.ageing_time, esw, vport); + err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id, + attr->u.ageing_time, br_offloads); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - err = mlx5_esw_bridge_vlan_filtering_set(attr->u.vlan_filtering, esw, vport); + err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id, + attr->u.vlan_filtering, br_offloads); break; default: err = -EOPNOTSUPP; @@ -179,27 +276,24 @@ static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, return err; } -static int mlx5_esw_bridge_event_blocking(struct notifier_block *unused, +static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb, unsigned long event, void *ptr) { + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + nb_blk); struct net_device *dev = switchdev_notifier_info_to_dev(ptr); int err; switch (event) { case SWITCHDEV_PORT_OBJ_ADD: - err = switchdev_handle_port_obj_add(dev, ptr, - mlx5e_eswitch_rep, - mlx5_esw_bridge_port_obj_add); + err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads); break; case SWITCHDEV_PORT_OBJ_DEL: - err = switchdev_handle_port_obj_del(dev, ptr, - mlx5e_eswitch_rep, - mlx5_esw_bridge_port_obj_del); + err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads); break; case SWITCHDEV_PORT_ATTR_SET: - err = switchdev_handle_port_attr_set(dev, ptr, - mlx5e_eswitch_rep, - mlx5_esw_bridge_port_obj_attr_set); + err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads); break; default: err = 0; @@ -222,27 +316,23 @@ static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work) container_of(work, struct mlx5_bridge_switchdev_fdb_work, work); struct switchdev_notifier_fdb_info *fdb_info = &fdb_work->fdb_info; + struct mlx5_esw_bridge_offloads *br_offloads = + fdb_work->br_offloads; struct net_device *dev = fdb_work->dev; - struct mlx5e_rep_priv *rpriv; - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - struct mlx5e_priv *priv; - u16 vport_num; + u16 vport_num, esw_owner_vhca_id; rtnl_lock(); - priv = netdev_priv(dev); - rpriv = priv->ppriv; - vport_num = rpriv->rep->vport; - esw = priv->mdev->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) goto out; if (fdb_work->add) - mlx5_esw_bridge_fdb_create(dev, esw, vport, fdb_info); + mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); else - mlx5_esw_bridge_fdb_remove(dev, esw, vport, fdb_info); + mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); out: rtnl_unlock(); @@ -251,7 +341,8 @@ out: static struct mlx5_bridge_switchdev_fdb_work * mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add, - struct switchdev_notifier_fdb_info *fdb_info) + struct switchdev_notifier_fdb_info *fdb_info, + struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_bridge_switchdev_fdb_work *work; u8 *addr; @@ -273,6 +364,7 @@ mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add, dev_hold(dev); work->dev = dev; + work->br_offloads = br_offloads; work->add = add; return work; } @@ -286,20 +378,14 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct switchdev_notifier_fdb_info *fdb_info; struct mlx5_bridge_switchdev_fdb_work *work; + struct mlx5_eswitch *esw = br_offloads->esw; struct switchdev_notifier_info *info = ptr; - struct net_device *upper; - struct mlx5e_priv *priv; - - if (!mlx5e_eswitch_rep(dev)) - return NOTIFY_DONE; - priv = netdev_priv(dev); - if (priv->mdev->priv.eswitch != br_offloads->esw) - return NOTIFY_DONE; + u16 vport_num, esw_owner_vhca_id; + struct net_device *upper, *rep; if (event == SWITCHDEV_PORT_ATTR_SET) { - int err = switchdev_handle_port_attr_set(dev, ptr, - mlx5e_eswitch_rep, - mlx5_esw_bridge_port_obj_attr_set); + int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads); + return notifier_from_errno(err); } @@ -309,7 +395,27 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, if (!netif_is_bridge_master(upper)) return NOTIFY_DONE; + rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id); + if (!rep) + return NOTIFY_DONE; + switch (event) { + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + /* only handle the event on native eswtich of representor */ + if (!mlx5_esw_bridge_is_local(dev, rep, esw)) + break; + + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + break; + case SWITCHDEV_FDB_DEL_TO_BRIDGE: + /* only handle the event on peers */ + if (mlx5_esw_bridge_is_local(dev, rep, esw)) + break; + fallthrough; case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = container_of(info, @@ -318,7 +424,8 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, work = mlx5_esw_bridge_init_switchdev_fdb_work(dev, event == SWITCHDEV_FDB_ADD_TO_DEVICE, - fdb_info); + fdb_info, + br_offloads); if (IS_ERR(work)) { WARN_ONCE(1, "Failed to init switchdev work, err=%ld", PTR_ERR(work)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 059799e4f483..51a4d80f7fa3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -17,7 +17,7 @@ #include "en/mapping.h" #include "en/tc_tun.h" #include "lib/port_tun.h" -#include "esw/sample.h" +#include "en/tc/sample.h" struct mlx5e_rep_indr_block_priv { struct net_device *netdev; @@ -516,7 +516,6 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) mlx5e_rep_indr_block_unbind); } -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, struct mlx5e_tc_update_priv *tc_priv, u32 tunnel_id) @@ -609,12 +608,13 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, return true; } -static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, - struct mlx5e_tc_update_priv *tc_priv) +static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, + struct mlx5e_tc_update_priv *tc_priv) { struct mlx5e_priv *priv = netdev_priv(skb->dev); u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) if (chain) { struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; @@ -636,9 +636,25 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, zone_restore_id)) return false; } +#endif /* CONFIG_NET_TC_SKB_EXT */ + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); } -#endif /* CONFIG_NET_TC_SKB_EXT */ + +static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv) +{ + if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { + netdev_dbg(priv->netdev, + "Failed to restore tunnel info for sampled packet\n"); + return; + } +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + mlx5e_tc_sample_skb(skb, mapped_obj); +#endif /* CONFIG_MLX5_TC_SAMPLE */ + mlx5_rep_tc_post_napi_receive(tc_priv); +} bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, @@ -647,7 +663,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct mlx5_mapped_obj mapped_obj; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; - u32 reg_c0, reg_c1; + u32 reg_c0; int err; reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); @@ -659,8 +675,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, */ skb->mark = 0; - reg_c1 = be32_to_cpu(cqe->ft_metadata); - priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj); @@ -671,18 +685,14 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, return false; } -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) - return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv); -#endif /* CONFIG_NET_TC_SKB_EXT */ -#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { - mlx5_esw_sample_skb(skb, &mapped_obj); + if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { + u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); + + return mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, tc_priv); + } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { + mlx5e_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); return false; - } -#endif /* CONFIG_MLX5_TC_SAMPLE */ - if (mapped_obj.type != MLX5_MAPPED_OBJ_SAMPLE && - mapped_obj.type != MLX5_MAPPED_OBJ_CHAIN) { + } else { netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 9d361efd5ff7..bb682fd751c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -372,7 +372,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; - for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { struct mlx5e_txqsq *sq = &c->sq[tc]; err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); @@ -384,7 +384,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) goto close_sqs_nest; - for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, &ptp_ch->ptpsq[tc], tc); @@ -494,7 +494,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; - for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { struct mlx5e_txqsq *sq = &c->sq[tc]; err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); @@ -504,7 +504,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, } if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { - for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c new file mode 100644 index 000000000000..625cd49ef96c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. + +#include "rss.h" + +#define mlx5e_rss_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5_TT_IPV4_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5_TT_IPV6] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt) +{ + return rss_default_config[tt]; +} + +struct mlx5e_rss { + struct mlx5e_rss_params_hash hash; + struct mlx5e_rss_params_indir indir; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_rqt rqt; + struct mlx5_core_dev *mdev; + u32 drop_rqn; + bool inner_ft_support; + bool enabled; + refcount_t refcnt; +}; + +struct mlx5e_rss *mlx5e_rss_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL); +} + +void mlx5e_rss_free(struct mlx5e_rss *rss) +{ + kvfree(rss); +} + +static void mlx5e_rss_params_init(struct mlx5e_rss *rss) +{ + enum mlx5_traffic_types tt; + + rss->hash.hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(rss->hash.toeplitz_hash_key, + sizeof(rss->hash.toeplitz_hash_key)); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + rss->rx_hash_fields[tt] = + mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; +} + +static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + return inner ? &rss->inner_tir[tt] : &rss->tir[tt]; +} + +static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + return *rss_get_tirp(rss, tt, inner); +} + +static struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + + rss_tt = mlx5e_rss_get_default_tt_config(tt); + rss_tt.rx_hash_fields = rss->rx_hash_fields[tt]; + return rss_tt; +} + +static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_lro_param *init_lro_param, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir **tir_p; + struct mlx5e_tir *tir; + u32 rqtn; + int err; + + if (inner && !rss->inner_ft_support) { + mlx5e_rss_warn(rss->mdev, + "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n", + tt); + return -EINVAL; + } + + tir_p = rss_get_tirp(rss, tt, inner); + if (*tir_p) + return -EINVAL; + + tir = kvzalloc(sizeof(*tir), GFP_KERNEL); + if (!tir) + return -ENOMEM; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) { + err = -ENOMEM; + goto free_tir; + } + + rqtn = mlx5e_rqt_get_rqtn(&rss->rqt); + mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn, + rqtn, rss->inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + rss_tt = mlx5e_rss_get_tt_config(rss, tt); + mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); + + err = mlx5e_tir_init(tir, builder, rss->mdev, true); + mlx5e_tir_builder_free(builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n", + inner ? "inner " : "", err, tt); + goto free_tir; + } + + *tir_p = tir; + return 0; + +free_tir: + kvfree(tir); + return err; +} + +static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_tir **tir_p; + struct mlx5e_tir *tir; + + tir_p = rss_get_tirp(rss, tt, inner); + if (!*tir_p) + return; + + tir = *tir_p; + mlx5e_tir_destroy(tir); + kvfree(tir); + *tir_p = NULL; +} + +static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss, + const struct mlx5e_lro_param *init_lro_param, + bool inner) +{ + enum mlx5_traffic_types tt, max_tt; + int err; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner); + if (err) + goto err_destroy_tirs; + } + + return 0; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_rss_destroy_tir(rss, tt, inner); + return err; +} + +static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner) +{ + enum mlx5_traffic_types tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_rss_destroy_tir(rss, tt, inner); +} + +static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir *tir; + int err; + + tir = rss_get_tir(rss, tt, inner); + if (!tir) + return 0; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + rss_tt = mlx5e_rss_get_tt_config(rss, tt); + + mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); + err = mlx5e_tir_modify(tir, builder); + + mlx5e_tir_builder_free(builder); + return err; +} + +static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss) +{ + enum mlx5_traffic_types tt; + int err, retval; + + retval = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rss_update_tir(rss, tt, false); + if (err) { + retval = retval ? : err; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + + if (!rss->inner_ft_support) + continue; + + err = mlx5e_rss_update_tir(rss, tt, true); + if (err) { + retval = retval ? : err; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + } + return retval; +} + +int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn) +{ + rss->mdev = mdev; + rss->inner_ft_support = inner_ft_support; + rss->drop_rqn = drop_rqn; + + mlx5e_rss_params_init(rss); + refcount_set(&rss->refcnt, 1); + + return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn); +} + +int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn, + const struct mlx5e_lro_param *init_lro_param) +{ + int err; + + err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn); + if (err) + goto err_out; + + err = mlx5e_rss_create_tirs(rss, init_lro_param, false); + if (err) + goto err_destroy_rqt; + + if (inner_ft_support) { + err = mlx5e_rss_create_tirs(rss, init_lro_param, true); + if (err) + goto err_destroy_tirs; + } + + return 0; + +err_destroy_tirs: + mlx5e_rss_destroy_tirs(rss, false); +err_destroy_rqt: + mlx5e_rqt_destroy(&rss->rqt); +err_out: + return err; +} + +int mlx5e_rss_cleanup(struct mlx5e_rss *rss) +{ + if (!refcount_dec_if_one(&rss->refcnt)) + return -EBUSY; + + mlx5e_rss_destroy_tirs(rss, false); + + if (rss->inner_ft_support) + mlx5e_rss_destroy_tirs(rss, true); + + mlx5e_rqt_destroy(&rss->rqt); + + return 0; +} + +void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss) +{ + refcount_inc(&rss->refcnt); +} + +void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss) +{ + refcount_dec(&rss->refcnt); +} + +unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss) +{ + return refcount_read(&rss->refcnt); +} + +u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_tir *tir; + + WARN_ON(inner && !rss->inner_ft_support); + tir = rss_get_tir(rss, tt, inner); + WARN_ON(!tir); + + return mlx5e_tir_get_tirn(tir); +} + +/* Fill the "tirn" output parameter. + * Create the requested TIR if it's its first usage. + */ +int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_lro_param *init_lro_param, + bool inner, u32 *tirn) +{ + struct mlx5e_tir *tir; + + tir = rss_get_tir(rss, tt, inner); + if (!tir) { /* TIR doesn't exist, create one */ + int err; + + err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner); + if (err) + return err; + tir = rss_get_tir(rss, tt, inner); + } + + *tirn = mlx5e_tir_get_tirn(tir); + return 0; +} + +static void mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +{ + int err; + + err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir); + if (err) + mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&rss->rqt), err); +} + +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +{ + rss->enabled = true; + mlx5e_rss_apply(rss, rqns, num_rqns); +} + +void mlx5e_rss_disable(struct mlx5e_rss *rss) +{ + int err; + + rss->enabled = false; + err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn); + if (err) + mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n", + mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err); +} + +int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param) +{ + struct mlx5e_tir_builder *builder; + enum mlx5_traffic_types tt; + int err, final_err; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_lro(builder, lro_param); + + final_err = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_tir *tir; + + tir = rss_get_tir(rss, tt, false); + if (!tir) + goto inner_tir; + err = mlx5e_tir_modify(tir, builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(tir), tt, err); + if (!final_err) + final_err = err; + } + +inner_tir: + if (!rss->inner_ft_support) + continue; + + tir = rss_get_tir(rss, tt, true); + if (!tir) + continue; + err = mlx5e_tir_modify(tir, builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(tir), tt, err); + if (!final_err) + final_err = err; + } + } + + mlx5e_tir_builder_free(builder); + return final_err; +} + +int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc) +{ + unsigned int i; + + if (indir) + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir[i] = rss->indir.table[i]; + + if (key) + memcpy(key, rss->hash.toeplitz_hash_key, + sizeof(rss->hash.toeplitz_hash_key)); + + if (hfunc) + *hfunc = rss->hash.hfunc; + + return 0; +} + +int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, + const u8 *key, const u8 *hfunc, + u32 *rqns, unsigned int num_rqns) +{ + bool changed_indir = false; + bool changed_hash = false; + + if (hfunc && *hfunc != rss->hash.hfunc) { + switch (*hfunc) { + case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_TOP: + break; + default: + return -EINVAL; + } + changed_hash = true; + changed_indir = true; + rss->hash.hfunc = *hfunc; + } + + if (key) { + if (rss->hash.hfunc == ETH_RSS_HASH_TOP) + changed_hash = true; + memcpy(rss->hash.toeplitz_hash_key, key, + sizeof(rss->hash.toeplitz_hash_key)); + } + + if (indir) { + unsigned int i; + + changed_indir = true; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + rss->indir.table[i] = indir[i]; + } + + if (changed_indir && rss->enabled) + mlx5e_rss_apply(rss, rqns, num_rqns); + + if (changed_hash) + mlx5e_rss_update_tirs(rss); + + return 0; +} + +struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss) +{ + return rss->hash; +} + +u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) +{ + return rss->rx_hash_fields[tt]; +} + +int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + u8 old_rx_hash_fields; + int err; + + old_rx_hash_fields = rss->rx_hash_fields[tt]; + + if (old_rx_hash_fields == rx_hash_fields) + return 0; + + rss->rx_hash_fields[tt] = rx_hash_fields; + + err = mlx5e_rss_update_tir(rss, tt, false); + if (err) { + rss->rx_hash_fields[tt] = old_rx_hash_fields; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n", + tt, err); + return err; + } + + if (!(rss->inner_ft_support)) + return 0; + + err = mlx5e_rss_update_tir(rss, tt, true); + if (err) { + /* Partial update happened. Try to revert - it may fail too, but + * there is nothing more we can do. + */ + rss->rx_hash_fields[tt] = old_rx_hash_fields; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + if (mlx5e_rss_update_tir(rss, tt, false)) + mlx5e_rss_warn(rss->mdev, + "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n", + tt); + } + + return err; +} + +void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch) +{ + mlx5e_rss_params_indir_init_uniform(&rss->indir, nch); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h new file mode 100644 index 000000000000..d522a10dadf3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_EN_RSS_H__ +#define __MLX5_EN_RSS_H__ + +#include "rqt.h" +#include "tir.h" +#include "fs.h" + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); + +struct mlx5e_rss; + +struct mlx5e_rss *mlx5e_rss_alloc(void); +void mlx5e_rss_free(struct mlx5e_rss *rss); +int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn, + const struct mlx5e_lro_param *init_lro_param); +int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn); +int mlx5e_rss_cleanup(struct mlx5e_rss *rss); + +void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); +void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss); +unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss); + +u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner); +int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_lro_param *init_lro_param, + bool inner, u32 *tirn); + +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns); +void mlx5e_rss_disable(struct mlx5e_rss *rss); + +int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param); +int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, + const u8 *key, const u8 *hfunc, + u32 *rqns, unsigned int num_rqns); +struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss); +u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt); +int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + u8 rx_hash_fields); +void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch); +#endif /* __MLX5_EN_RSS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index e2a8fe13f29d..bf0313e2682b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -5,64 +5,7 @@ #include "channels.h" #include "params.h" -static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { - [MLX5_TT_IPV4_TCP] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5_TT_IPV6_TCP] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5_TT_IPV4_UDP] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5_TT_IPV6_UDP] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5_TT_IPV4_IPSEC_AH] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5_TT_IPV6_IPSEC_AH] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5_TT_IPV4_IPSEC_ESP] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5_TT_IPV6_IPSEC_ESP] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5_TT_IPV4] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP, - }, - [MLX5_TT_IPV6] = { - .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP, - }, -}; - -struct mlx5e_rss_params_traffic_type -mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt) -{ - return rss_default_config[tt]; -} +#define MLX5E_MAX_NUM_RSS 16 struct mlx5e_rx_res { struct mlx5_core_dev *mdev; @@ -70,18 +13,7 @@ struct mlx5e_rx_res { unsigned int max_nch; u32 drop_rqn; - struct { - struct mlx5e_rss_params_hash hash; - struct mlx5e_rss_params_indir indir; - u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; - } rss_params; - - struct mlx5e_rqt indir_rqt; - struct { - struct mlx5e_tir indir_tir; - struct mlx5e_tir inner_indir_tir; - } rss[MLX5E_NUM_INDIR_TIRS]; - + struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS]; bool rss_active; u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; unsigned int rss_nch; @@ -99,103 +31,247 @@ struct mlx5e_rx_res { } ptp; }; -struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) -{ - return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); -} +/* API for rx_res_rss_* */ -static void mlx5e_rx_res_rss_params_init(struct mlx5e_rx_res *res, unsigned int init_nch) +static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, + const struct mlx5e_lro_param *init_lro_param, + unsigned int init_nch) { - enum mlx5_traffic_types tt; + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_rss *rss; + int err; + + if (WARN_ON(res->rss[0])) + return -EINVAL; - res->rss_params.hash.hfunc = ETH_RSS_HASH_TOP; - netdev_rss_key_fill(res->rss_params.hash.toeplitz_hash_key, - sizeof(res->rss_params.hash.toeplitz_hash_key)); - mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, init_nch); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - res->rss_params.rx_hash_fields[tt] = - mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; + rss = mlx5e_rss_alloc(); + if (!rss) + return -ENOMEM; + + err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn, + init_lro_param); + if (err) + goto err_rss_free; + + mlx5e_rss_set_indir_uniform(rss, init_nch); + + res->rss[0] = rss; + + return 0; + +err_rss_free: + mlx5e_rss_free(rss); + return err; } -static int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, - const struct mlx5e_lro_param *init_lro_param) +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; - enum mlx5_traffic_types tt, max_tt; - struct mlx5e_tir_builder *builder; - u32 indir_rqtn; - int err; + struct mlx5e_rss *rss; + int err, i; - builder = mlx5e_tir_builder_alloc(false); - if (!builder) + for (i = 1; i < MLX5E_MAX_NUM_RSS; i++) + if (!res->rss[i]) + break; + + if (i == MLX5E_MAX_NUM_RSS) + return -ENOSPC; + + rss = mlx5e_rss_alloc(); + if (!rss) return -ENOMEM; - err = mlx5e_rqt_init_direct(&res->indir_rqt, res->mdev, true, res->drop_rqn); + err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn); if (err) - goto out; + goto err_rss_free; - indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt); + mlx5e_rss_set_indir_uniform(rss, init_nch); + if (res->rss_active) + mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - struct mlx5e_rss_params_traffic_type rss_tt; + res->rss[i] = rss; + *rss_idx = i; - mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, - indir_rqtn, inner_ft_support); - mlx5e_tir_builder_build_lro(builder, init_lro_param); - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, false); + return 0; - err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, res->mdev, true); - if (err) { - mlx5_core_warn(res->mdev, "Failed to create an indirect TIR: err = %d, tt = %d\n", - err, tt); - goto err_destroy_tirs; - } +err_rss_free: + mlx5e_rss_free(rss); + return err; +} - mlx5e_tir_builder_clear(builder); - } +static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx) +{ + struct mlx5e_rss *rss = res->rss[rss_idx]; + int err; - if (!inner_ft_support) - goto out; + err = mlx5e_rss_cleanup(rss); + if (err) + return err; - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - struct mlx5e_rss_params_traffic_type rss_tt; + mlx5e_rss_free(rss); + res->rss[rss_idx] = NULL; - mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, - indir_rqtn, inner_ft_support); - mlx5e_tir_builder_build_lro(builder, init_lro_param); - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, true); + return 0; +} + +int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -EINVAL; + + return __mlx5e_rx_res_rss_destroy(res, rss_idx); +} + +static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res) +{ + int i; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + int err; - err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, res->mdev, true); + if (!rss) + continue; + + err = __mlx5e_rx_res_rss_destroy(res, i); if (err) { - mlx5_core_warn(res->mdev, "Failed to create an inner indirect TIR: err = %d, tt = %d\n", - err, tt); - goto err_destroy_inner_tirs; + unsigned int refcount; + + refcount = mlx5e_rss_refcnt_read(rss); + mlx5_core_warn(res->mdev, + "Failed to destroy RSS context %d, refcount = %u, err = %d\n", + i, refcount, err); } + } +} - mlx5e_tir_builder_clear(builder); +static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) +{ + int i; + + res->rss_active = true; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + + if (!rss) + continue; + mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); } +} - goto out; +static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res) +{ + int i; -err_destroy_inner_tirs: - max_tt = tt; - for (tt = 0; tt < max_tt; tt++) - mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + res->rss_active = false; - tt = MLX5E_NUM_INDIR_TIRS; -err_destroy_tirs: - max_tt = tt; - for (tt = 0; tt < max_tt; tt++) - mlx5e_tir_destroy(&res->rss[tt].indir_tir); + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; - mlx5e_rqt_destroy(&res->indir_rqt); + if (!rss) + continue; + mlx5e_rss_disable(rss); + } +} -out: - mlx5e_tir_builder_free(builder); +/* Updates the indirection table SW shadow, does not update the HW resources yet */ +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch) +{ + WARN_ON_ONCE(res->rss_active); + mlx5e_rss_set_indir_uniform(res->rss[0], nch); +} - return err; +int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_get_rxfh(rss, indir, key, hfunc); +} + +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + const u32 *indir, const u8 *key, const u8 *hfunc) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch); +} + +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_hash_fields(rss, tt); +} + +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields); +} + +int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res) +{ + int i, cnt; + + cnt = 0; + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) + if (res->rss[i]) + cnt++; + + return cnt; +} + +int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss) +{ + int i; + + if (!rss) + return -EINVAL; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) + if (rss == res->rss[i]) + return i; + + return -ENOENT; +} + +struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx) +{ + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return NULL; + + return res->rss[rss_idx]; +} + +/* End of API rx_res_rss_* */ + +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); } static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, @@ -335,20 +411,6 @@ out: return err; } -static void mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res) -{ - enum mlx5_traffic_types tt; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5e_tir_destroy(&res->rss[tt].indir_tir); - - if (res->features & MLX5E_RX_RES_FEATURE_INNER_FT) - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); - - mlx5e_rqt_destroy(&res->indir_rqt); -} - static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res) { unsigned int ix; @@ -385,11 +447,9 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, res->max_nch = max_nch; res->drop_rqn = drop_rqn; - mlx5e_rx_res_rss_params_init(res, init_nch); - - err = mlx5e_rx_res_rss_init(res, init_lro_param); + err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch); if (err) - return err; + goto err_out; err = mlx5e_rx_res_channels_init(res, init_lro_param); if (err) @@ -404,7 +464,8 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, err_channels_destroy: mlx5e_rx_res_channels_destroy(res); err_rss_destroy: - mlx5e_rx_res_rss_destroy(res); + __mlx5e_rx_res_rss_destroy(res, 0); +err_out: return err; } @@ -412,7 +473,7 @@ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) { mlx5e_rx_res_ptp_destroy(res); mlx5e_rx_res_channels_destroy(res); - mlx5e_rx_res_rss_destroy(res); + mlx5e_rx_res_rss_destroy_all(res); } void mlx5e_rx_res_free(struct mlx5e_rx_res *res) @@ -434,13 +495,16 @@ u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix) u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) { - return mlx5e_tir_get_tirn(&res->rss[tt].indir_tir); + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_tirn(rss, tt, false); } u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) { - WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)); - return mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir); + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_tirn(rss, tt, true); } u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) @@ -454,32 +518,6 @@ u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } -static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) -{ - int err; - - res->rss_active = true; - - err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, - res->rss_params.hash.hfunc, - &res->rss_params.indir); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", - mlx5e_rqt_get_rqtn(&res->indir_rqt), err); -} - -static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res) -{ - int err; - - res->rss_active = false; - - err = mlx5e_rqt_redirect_direct(&res->indir_rqt, res->drop_rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to drop RQ %#x: err = %d\n", - mlx5e_rqt_get_rqtn(&res->indir_rqt), res->drop_rqn, err); -} - void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) { unsigned int nch, ix; @@ -607,178 +645,9 @@ int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix) return err; } -struct mlx5e_rss_params_traffic_type -mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) -{ - struct mlx5e_rss_params_traffic_type rss_tt; - - rss_tt = mlx5e_rss_get_default_tt_config(tt); - rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt]; - return rss_tt; -} - -void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch) -{ - mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, nch); - - if (!res->rss_active) - return; - - mlx5e_rx_res_rss_enable(res); -} - -void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc) -{ - unsigned int i; - - if (indir) - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) - indir[i] = res->rss_params.indir.table[i]; - - if (key) - memcpy(key, res->rss_params.hash.toeplitz_hash_key, - sizeof(res->rss_params.hash.toeplitz_hash_key)); - - if (hfunc) - *hfunc = res->rss_params.hash.hfunc; -} - -static int mlx5e_rx_res_rss_update_tir(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, - bool inner) -{ - struct mlx5e_rss_params_traffic_type rss_tt; - struct mlx5e_tir_builder *builder; - struct mlx5e_tir *tir; - int err; - - builder = mlx5e_tir_builder_alloc(true); - if (!builder) - return -ENOMEM; - - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - - mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, inner); - tir = inner ? &res->rss[tt].inner_indir_tir : &res->rss[tt].indir_tir; - err = mlx5e_tir_modify(tir, builder); - - mlx5e_tir_builder_free(builder); - return err; -} - -int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, - const u8 *key, const u8 *hfunc) -{ - enum mlx5_traffic_types tt; - bool changed_indir = false; - bool changed_hash = false; - int err; - - if (hfunc && *hfunc != res->rss_params.hash.hfunc) { - switch (*hfunc) { - case ETH_RSS_HASH_XOR: - case ETH_RSS_HASH_TOP: - break; - default: - return -EINVAL; - } - changed_hash = true; - changed_indir = true; - res->rss_params.hash.hfunc = *hfunc; - } - - if (key) { - if (res->rss_params.hash.hfunc == ETH_RSS_HASH_TOP) - changed_hash = true; - memcpy(res->rss_params.hash.toeplitz_hash_key, key, - sizeof(res->rss_params.hash.toeplitz_hash_key)); - } - - if (indir) { - unsigned int i; - - changed_indir = true; - - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) - res->rss_params.indir.table[i] = indir[i]; - } - - if (changed_indir && res->rss_active) { - err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, - res->rss_params.hash.hfunc, - &res->rss_params.indir); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", - mlx5e_rqt_get_rqtn(&res->indir_rqt), err); - } - - if (changed_hash) - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5e_rx_res_rss_update_tir(res, tt, false); - if (err) - mlx5_core_warn(res->mdev, "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n", - tt, err); - - if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) - continue; - - err = mlx5e_rx_res_rss_update_tir(res, tt, true); - if (err) - mlx5_core_warn(res->mdev, "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n", - tt, err); - } - - return 0; -} - -u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) -{ - return res->rss_params.rx_hash_fields[tt]; -} - -int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, - u8 rx_hash_fields) -{ - u8 old_rx_hash_fields; - int err; - - old_rx_hash_fields = res->rss_params.rx_hash_fields[tt]; - - if (old_rx_hash_fields == rx_hash_fields) - return 0; - - res->rss_params.rx_hash_fields[tt] = rx_hash_fields; - - err = mlx5e_rx_res_rss_update_tir(res, tt, false); - if (err) { - res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; - mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n", - tt, err); - return err; - } - - if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) - return 0; - - err = mlx5e_rx_res_rss_update_tir(res, tt, true); - if (err) { - /* Partial update happened. Try to revert - it may fail too, but - * there is nothing more we can do. - */ - res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; - mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n", - tt, err); - if (mlx5e_rx_res_rss_update_tir(res, tt, false)) - mlx5_core_warn(res->mdev, "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n", - tt); - } - - return err; -} - int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param) { struct mlx5e_tir_builder *builder; - enum mlx5_traffic_types tt; int err, final_err; unsigned int ix; @@ -790,25 +659,15 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param final_err = 0; - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); - if (err) { - mlx5_core_warn(res->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", - mlx5e_tir_get_tirn(&res->rss[tt].indir_tir), tt, err); - if (!final_err) - final_err = err; - } + for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) { + struct mlx5e_rss *rss = res->rss[ix]; - if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + if (!rss) continue; - err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder); - if (err) { - mlx5_core_warn(res->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", - mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir), tt, err); - if (!final_err) - final_err = err; - } + err = mlx5e_rss_lro_set_param(rss, lro_param); + if (err) + final_err = final_err ? : err; } for (ix = 0; ix < res->max_nch; ix++) { @@ -827,5 +686,5 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res) { - return res->rss_params.hash; + return mlx5e_rss_get_hash(res->rss[0]); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 1baeec5158a3..4a15942d79f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -8,6 +8,7 @@ #include "rqt.h" #include "tir.h" #include "fs.h" +#include "rss.h" struct mlx5e_rx_res; @@ -20,9 +21,6 @@ enum mlx5e_rx_res_features { MLX5E_RX_RES_FEATURE_PTP = BIT(2), }; -struct mlx5e_rss_params_traffic_type -mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); - /* Setup */ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void); int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, @@ -50,17 +48,23 @@ int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *c int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix); /* Configuration API */ -struct mlx5e_rss_params_traffic_type -mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); -void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc); -int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, - const u8 *key, const u8 *hfunc); +int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + const u32 *indir, const u8 *key, const u8 *hfunc); + u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, u8 rx_hash_fields); int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param); +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch); +int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx); +int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res); +int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss); +struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx); + /* Workaround for hairpin */ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c new file mode 100644 index 000000000000..a3e43e898a56 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "en_tc.h" +#include "post_act.h" +#include "mlx5_core.h" + +struct mlx5e_post_act { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_fs_chains *chains; + struct mlx5_flow_table *ft; + struct mlx5e_priv *priv; + struct xarray ids; +}; + +struct mlx5e_post_act_handle { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_flow_attr *attr; + struct mlx5_flow_handle *rule; + u32 id; +}; + +#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen) +#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0) +#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_post_act *post_act; + int err; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ignore_flow_level)) { + mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + err = -EOPNOTSUPP; + goto err_check; + } else if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + err = -EOPNOTSUPP; + goto err_check; + } + + post_act = kzalloc(sizeof(*post_act), GFP_KERNEL); + if (!post_act) { + err = -ENOMEM; + goto err_check; + } + post_act->ft = mlx5_chains_create_global_table(chains); + if (IS_ERR(post_act->ft)) { + err = PTR_ERR(post_act->ft); + mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err); + goto err_ft; + } + post_act->chains = chains; + post_act->ns_type = ns_type; + post_act->priv = priv; + xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1); + return post_act; + +err_ft: + kfree(post_act); +err_check: + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act) +{ + if (IS_ERR_OR_NULL(post_act)) + return; + + xa_destroy(&post_act->ids); + mlx5_chains_destroy_global_table(post_act->chains, post_act->ft); + kfree(post_act); +} + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr) +{ + u32 attr_sz = ns_to_attr_sz(post_act->ns_type); + struct mlx5e_post_act_handle *handle = NULL; + struct mlx5_flow_attr *post_attr = NULL; + struct mlx5_flow_spec *spec = NULL; + int err; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + post_attr = mlx5_alloc_flow_attr(post_act->ns_type); + if (!handle || !spec || !post_attr) { + kfree(post_attr); + kvfree(spec); + kfree(handle); + return ERR_PTR(-ENOMEM); + } + + memcpy(post_attr, attr, attr_sz); + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = post_act->ft; + post_attr->inner_match_level = MLX5_MATCH_NONE; + post_attr->outer_match_level = MLX5_MATCH_NONE; + post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + + handle->ns_type = post_act->ns_type; + /* Splits were handled before post action */ + if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB) + post_attr->esw_attr->split_count = 0; + + err = xa_alloc(&post_act->ids, &handle->id, post_attr, + XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL); + if (err) + goto err_xarray; + + /* Post action rule matches on fte_id and executes original rule's + * tc rule action + */ + mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, + handle->id, MLX5_POST_ACTION_MASK); + + handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr); + if (IS_ERR(handle->rule)) { + err = PTR_ERR(handle->rule); + netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); + goto err_rule; + } + handle->attr = post_attr; + + kvfree(spec); + return handle; + +err_rule: + xa_erase(&post_act->ids, handle->id); +err_xarray: + kfree(post_attr); + kvfree(spec); + kfree(handle); + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle) +{ + mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr); + xa_erase(&post_act->ids, handle->id); + kfree(handle->attr); + kfree(handle); +} + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act) +{ + return post_act->ft; +} + +/* Allocate a header modify action to write the post action handle fte id to a register. */ +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts) +{ + return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h new file mode 100644 index 000000000000..b530ec1981a5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_POST_ACTION_H__ +#define __MLX5_POST_ACTION_H__ + +#include "en.h" +#include "lib/fs_chains.h" + +struct mlx5_flow_attr; +struct mlx5e_priv; +struct mlx5e_tc_mod_hdr_acts; + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act); + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr); + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act); + +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts); + +#endif /* __MLX5_POST_ACTION_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index 794012c5c476..6552ecee3f9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -4,7 +4,8 @@ #include <linux/skbuff.h> #include <net/psample.h> #include "en/mapping.h" -#include "esw/sample.h" +#include "en/tc/post_act.h" +#include "sample.h" #include "eswitch.h" #include "en_tc.h" #include "fs_core.h" @@ -17,17 +18,18 @@ static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP, }; -struct mlx5_esw_psample { - struct mlx5e_priv *priv; +struct mlx5e_tc_psample { + struct mlx5_eswitch *esw; struct mlx5_flow_table *termtbl; struct mlx5_flow_handle *termtbl_rule; DECLARE_HASHTABLE(hashtbl, 8); struct mutex ht_lock; /* protect hashtbl */ DECLARE_HASHTABLE(restore_hashtbl, 8); struct mutex restore_lock; /* protect restore_hashtbl */ + struct mlx5e_post_act *post_act; }; -struct mlx5_sampler { +struct mlx5e_sampler { struct hlist_node hlist; u32 sampler_id; u32 sample_ratio; @@ -36,29 +38,32 @@ struct mlx5_sampler { int count; }; -struct mlx5_sample_flow { - struct mlx5_sampler *sampler; - struct mlx5_sample_restore *restore; +struct mlx5e_sample_flow { + struct mlx5e_sampler *sampler; + struct mlx5e_sample_restore *restore; struct mlx5_flow_attr *pre_attr; struct mlx5_flow_handle *pre_rule; - struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *post_attr; + struct mlx5_flow_handle *post_rule; + struct mlx5e_post_act_handle *post_act_handle; }; -struct mlx5_sample_restore { +struct mlx5e_sample_restore { struct hlist_node hlist; struct mlx5_modify_hdr *modify_hdr; struct mlx5_flow_handle *rule; + struct mlx5e_post_act_handle *post_act_handle; u32 obj_id; int count; }; static int -sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) +sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample) { - struct mlx5_core_dev *dev = esw_psample->priv->mdev; - struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_eswitch *esw = tc_psample->esw; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_destination dest = {}; + struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_act act = {}; int err; @@ -79,20 +84,20 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) ft_attr.prio = FDB_SLOW_PATH; ft_attr.max_fte = 1; ft_attr.level = 1; - esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); - if (IS_ERR(esw_psample->termtbl)) { - err = PTR_ERR(esw_psample->termtbl); + tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(tc_psample->termtbl)) { + err = PTR_ERR(tc_psample->termtbl); mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err); return err; } act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; dest.vport.num = esw->manager_vport; - esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1); - if (IS_ERR(esw_psample->termtbl_rule)) { - err = PTR_ERR(esw_psample->termtbl_rule); + tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1); + if (IS_ERR(tc_psample->termtbl_rule)) { + err = PTR_ERR(tc_psample->termtbl_rule); mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err); - mlx5_destroy_flow_table(esw_psample->termtbl); + mlx5_destroy_flow_table(tc_psample->termtbl); return err; } @@ -100,14 +105,14 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) } static void -sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample) +sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample) { - mlx5_del_flow_rules(esw_psample->termtbl_rule); - mlx5_destroy_flow_table(esw_psample->termtbl); + mlx5_del_flow_rules(tc_psample->termtbl_rule); + mlx5_destroy_flow_table(tc_psample->termtbl); } static int -sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler) +sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler) { u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; @@ -163,16 +168,16 @@ sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 def return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2; } -static struct mlx5_sampler * -sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id) +static struct mlx5e_sampler * +sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id) { - struct mlx5_sampler *sampler; + struct mlx5e_sampler *sampler; u32 hash_key; int err; - mutex_lock(&esw_psample->ht_lock); + mutex_lock(&tc_psample->ht_lock); hash_key = sampler_hash(sample_ratio, default_table_id); - hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key) + hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key) if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id, sample_ratio, default_table_id)) goto add_ref; @@ -183,42 +188,49 @@ sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_ goto err_alloc; } - sampler->sample_table_id = esw_psample->termtbl->id; + sampler->sample_table_id = tc_psample->termtbl->id; sampler->default_table_id = default_table_id; sampler->sample_ratio = sample_ratio; - err = sampler_obj_create(esw_psample->priv->mdev, sampler); + err = sampler_obj_create(tc_psample->esw->dev, sampler); if (err) goto err_create; - hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key); + hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key); add_ref: sampler->count++; - mutex_unlock(&esw_psample->ht_lock); + mutex_unlock(&tc_psample->ht_lock); return sampler; err_create: kfree(sampler); err_alloc: - mutex_unlock(&esw_psample->ht_lock); + mutex_unlock(&tc_psample->ht_lock); return ERR_PTR(err); } static void -sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler) +sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler) { - mutex_lock(&esw_psample->ht_lock); + mutex_lock(&tc_psample->ht_lock); if (--sampler->count == 0) { hash_del(&sampler->hlist); - sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id); + sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id); kfree(sampler); } - mutex_unlock(&esw_psample->ht_lock); + mutex_unlock(&tc_psample->ht_lock); } +/* obj_id is used to restore the sample parameters. + * Set fte_id in original flow table, then match it in the default table. + * Only set it for NICs can preserve reg_c or decap action. For other cases, + * use the same match in the default table. + * Use one header rewrite for both obj_id and fte_id. + */ static struct mlx5_modify_hdr * -sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) +sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, + struct mlx5e_post_act_handle *handle) { struct mlx5e_tc_mod_hdr_acts mod_acts = {}; struct mlx5_modify_hdr *modify_hdr; @@ -229,6 +241,12 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) if (err) goto err_set_regc0; + if (handle) { + err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts); + if (err) + goto err_post_act; + } + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, mod_acts.num_actions, mod_acts.actions); @@ -241,23 +259,40 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) return modify_hdr; err_modify_hdr: +err_post_act: dealloc_mod_hdr_actions(&mod_acts); err_set_regc0: return ERR_PTR(err); } -static struct mlx5_sample_restore * -sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) +static u32 +restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle) { - struct mlx5_core_dev *mdev = esw_psample->priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_sample_restore *restore; + return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0); +} + +static bool +restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id, + struct mlx5e_post_act_handle *post_act_handle) +{ + return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle; +} + +static struct mlx5e_sample_restore * +sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, + struct mlx5e_post_act_handle *post_act_handle) +{ + struct mlx5_eswitch *esw = tc_psample->esw; + struct mlx5_core_dev *mdev = esw->dev; + struct mlx5e_sample_restore *restore; struct mlx5_modify_hdr *modify_hdr; + u32 hash_key; int err; - mutex_lock(&esw_psample->restore_lock); - hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id) - if (restore->obj_id == obj_id) + mutex_lock(&tc_psample->restore_lock); + hash_key = restore_hash(obj_id, post_act_handle); + hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key) + if (restore_equal(restore, obj_id, post_act_handle)) goto add_ref; restore = kzalloc(sizeof(*restore), GFP_KERNEL); @@ -266,8 +301,9 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) goto err_alloc; } restore->obj_id = obj_id; + restore->post_act_handle = post_act_handle; - modify_hdr = sample_metadata_rule_get(mdev, obj_id); + modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; @@ -280,10 +316,10 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) goto err_restore; } - hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id); + hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key); add_ref: restore->count++; - mutex_unlock(&esw_psample->restore_lock); + mutex_unlock(&tc_psample->restore_lock); return restore; err_restore: @@ -291,26 +327,26 @@ err_restore: err_modify_hdr: kfree(restore); err_alloc: - mutex_unlock(&esw_psample->restore_lock); + mutex_unlock(&tc_psample->restore_lock); return ERR_PTR(err); } static void -sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore) +sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore) { - mutex_lock(&esw_psample->restore_lock); + mutex_lock(&tc_psample->restore_lock); if (--restore->count == 0) hash_del(&restore->hlist); - mutex_unlock(&esw_psample->restore_lock); + mutex_unlock(&tc_psample->restore_lock); if (!restore->count) { mlx5_del_flow_rules(restore->rule); - mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr); + mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr); kfree(restore); } } -void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) { u32 trunc_size = mapped_obj->sample.trunc_size; struct psample_group psample_group = {}; @@ -325,6 +361,87 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md); } +static int +add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, + u32 *default_tbl_id) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB); + struct mlx5_vport_tbl_attr per_vport_tbl_attr; + struct mlx5_flow_table *default_tbl; + struct mlx5_flow_attr *post_attr; + int err; + + /* Allocate default table per vport, chain and prio. Otherwise, there is + * only one default table for the same sampler object. Rules with different + * prio and chain may overlap. For CT sample action, per vport default + * table is needed to resotre the metadata. + */ + per_vport_tbl_attr.chain = attr->chain; + per_vport_tbl_attr.prio = attr->prio; + per_vport_tbl_attr.vport = esw_attr->in_rep->vport; + per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); + if (IS_ERR(default_tbl)) { + err = PTR_ERR(default_tbl); + goto err_default_tbl; + } + *default_tbl_id = default_tbl->id; + + post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!post_attr) { + err = -ENOMEM; + goto err_attr; + } + sample_flow->post_attr = post_attr; + memcpy(post_attr, attr, attr_sz); + /* Perform the original matches on the default table. + * Offload all actions except the sample action. + */ + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = default_tbl; + post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + + /* When offloading sample and encap action, if there is no valid + * neigh data struct, a slow path rule is offloaded first. Source + * port metadata match is set at that time. A per vport table is + * already allocated. No need to match it again. So clear the source + * port metadata match. + */ + mlx5_eswitch_clear_rule_source_port(esw, spec); + sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr); + if (IS_ERR(sample_flow->post_rule)) { + err = PTR_ERR(sample_flow->post_rule); + goto err_rule; + } + return 0; + +err_rule: + kfree(post_attr); +err_attr: + mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); +err_default_tbl: + return err; +} + +static void +del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_vport_tbl_attr tbl_attr; + + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr); + kfree(sample_flow->post_attr); + tbl_attr.chain = attr->chain; + tbl_attr.prio = attr->prio; + tbl_attr.vport = esw_attr->in_rep->vport; + tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + mlx5_esw_vporttbl_put(esw, &tbl_attr); +} + /* For the following typical flow table: * * +-------------------------------+ @@ -342,8 +459,9 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj * +---------------------+ * + original match + * +---------------------+ - * | - * v + * | set fte_id (if reg_c preserve cap) + * | do decap (if required) + * v * +------------------------------------------------+ * + Flow Sampler Object + * +------------------------------------------------+ @@ -353,80 +471,82 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj * +------------------------------------------------+ * | | * v v - * +-----------------------------+ +----------------------------------------+ - * + sample table + + default table per <vport, chain, prio> + - * +-----------------------------+ +----------------------------------------+ - * + forward to management vport + + original match + - * +-----------------------------+ +----------------------------------------+ - * + other actions + - * +----------------------------------------+ + * +-----------------------------+ +-------------------+ + * + sample table + + default table + + * +-----------------------------+ +-------------------+ + * + forward to management vport + | + * +-----------------------------+ | + * +-------+------+ + * | |reg_c preserve cap + * | |or decap action + * v v + * +-----------------+ +-------------+ + * + per vport table + + post action + + * +-----------------+ +-------------+ + * + original match + + * +-----------------+ + * + other actions + + * +-----------------+ */ struct mlx5_flow_handle * -mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr) + struct mlx5_flow_attr *attr, + u32 tunnel_id) { + struct mlx5e_post_act_handle *post_act_handle = NULL; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_vport_tbl_attr per_vport_tbl_attr; struct mlx5_esw_flow_attr *pre_esw_attr; struct mlx5_mapped_obj restore_obj = {}; - struct mlx5_sample_flow *sample_flow; - struct mlx5_sample_attr *sample_attr; - struct mlx5_flow_table *default_tbl; + struct mlx5e_sample_flow *sample_flow; + struct mlx5e_sample_attr *sample_attr; struct mlx5_flow_attr *pre_attr; struct mlx5_eswitch *esw; + u32 default_tbl_id; u32 obj_id; int err; - if (IS_ERR_OR_NULL(esw_psample)) + if (IS_ERR_OR_NULL(tc_psample)) return ERR_PTR(-EOPNOTSUPP); /* If slow path flag is set, eg. when the neigh is invalid for encap, * don't offload sample action. */ - esw = esw_psample->priv->mdev->priv.eswitch; + esw = tc_psample->esw; if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); if (!sample_flow) return ERR_PTR(-ENOMEM); - esw_attr->sample->sample_flow = sample_flow; + sample_attr = attr->sample_attr; + sample_attr->sample_flow = sample_flow; - /* Allocate default table per vport, chain and prio. Otherwise, there is - * only one default table for the same sampler object. Rules with different - * prio and chain may overlap. For CT sample action, per vport default - * table is needed to resotre the metadata. + /* For NICs with reg_c_preserve support or decap action, use + * post action instead of the per vport, chain and prio table. + * Only match the fte id instead of the same match in the + * original flow table. */ - per_vport_tbl_attr.chain = attr->chain; - per_vport_tbl_attr.prio = attr->prio; - per_vport_tbl_attr.vport = esw_attr->in_rep->vport; - per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; - default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); - if (IS_ERR(default_tbl)) { - err = PTR_ERR(default_tbl); - goto err_default_tbl; - } - - /* Perform the original matches on the default table. - * Offload all actions except the sample action. - */ - esw_attr->sample->sample_default_tbl = default_tbl; - /* When offloading sample and encap action, if there is no valid - * neigh data struct, a slow path rule is offloaded first. Source - * port metadata match is set at that time. A per vport table is - * already allocated. No need to match it again. So clear the source - * port metadata match. - */ - mlx5_eswitch_clear_rule_source_port(esw, spec); - sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - if (IS_ERR(sample_flow->rule)) { - err = PTR_ERR(sample_flow->rule); - goto err_offload_rule; + if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) || + attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + struct mlx5_flow_table *ft; + + ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act); + default_tbl_id = ft->id; + post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr); + if (IS_ERR(post_act_handle)) { + err = PTR_ERR(post_act_handle); + goto err_post_act; + } + sample_flow->post_act_handle = post_act_handle; + } else { + err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id); + if (err) + goto err_post_rule; } /* Create sampler object. */ - sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id); + sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id); if (IS_ERR(sample_flow->sampler)) { err = PTR_ERR(sample_flow->sampler); goto err_sampler; @@ -434,16 +554,17 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, /* Create an id mapping reg_c0 value to sample object. */ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; - restore_obj.sample.group_id = esw_attr->sample->group_num; - restore_obj.sample.rate = esw_attr->sample->rate; - restore_obj.sample.trunc_size = esw_attr->sample->trunc_size; + restore_obj.sample.group_id = sample_attr->group_num; + restore_obj.sample.rate = sample_attr->rate; + restore_obj.sample.trunc_size = sample_attr->trunc_size; + restore_obj.sample.tunnel_id = tunnel_id; err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id); if (err) goto err_obj_id; - esw_attr->sample->restore_obj_id = obj_id; + sample_attr->restore_obj_id = obj_id; /* Create sample restore context. */ - sample_flow->restore = sample_restore_get(esw_psample, obj_id); + sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle); if (IS_ERR(sample_flow->restore)) { err = PTR_ERR(sample_flow->restore); goto err_sample_restore; @@ -455,21 +576,23 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); if (!pre_attr) { err = -ENOMEM; - goto err_alloc_flow_attr; - } - sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL); - if (!sample_attr) { - err = -ENOMEM; - goto err_alloc_sample_attr; + goto err_alloc_pre_flow_attr; } - pre_esw_attr = pre_attr->esw_attr; pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + /* For decap action, do decap in the original flow table instead of the + * default flow table. + */ + if (tunnel_id) + pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; pre_attr->modify_hdr = sample_flow->restore->modify_hdr; pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; + pre_attr->inner_match_level = attr->inner_match_level; + pre_attr->outer_match_level = attr->outer_match_level; pre_attr->chain = attr->chain; pre_attr->prio = attr->prio; - pre_esw_attr->sample = sample_attr; - pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id; + pre_attr->sample_attr = attr->sample_attr; + sample_attr->sampler_id = sample_flow->sampler->sampler_id; + pre_esw_attr = pre_attr->esw_attr; pre_esw_attr->in_mdev = esw_attr->in_mdev; pre_esw_attr->in_rep = esw_attr->in_rep; sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr); @@ -479,107 +602,113 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, } sample_flow->pre_attr = pre_attr; - return sample_flow->rule; + return sample_flow->post_rule; err_pre_offload_rule: - kfree(sample_attr); -err_alloc_sample_attr: kfree(pre_attr); -err_alloc_flow_attr: - sample_restore_put(esw_psample, sample_flow->restore); +err_alloc_pre_flow_attr: + sample_restore_put(tc_psample, sample_flow->restore); err_sample_restore: mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id); err_obj_id: - sampler_put(esw_psample, sample_flow->sampler); + sampler_put(tc_psample, sample_flow->sampler); err_sampler: - /* For sample offload, rule is added in default_tbl. No need to call - * mlx5_esw_chains_put_table() - */ - attr->prio = 0; - attr->chain = 0; - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); -err_offload_rule: - mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); -err_default_tbl: + if (!post_act_handle) + del_post_rule(esw, sample_flow, attr); +err_post_rule: + if (post_act_handle) + mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle); +err_post_act: + kfree(sample_flow); return ERR_PTR(err); } void -mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample, +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_handle *rule, struct mlx5_flow_attr *attr) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_sample_flow *sample_flow; + struct mlx5e_sample_flow *sample_flow; struct mlx5_vport_tbl_attr tbl_attr; - struct mlx5_flow_attr *pre_attr; struct mlx5_eswitch *esw; - if (IS_ERR_OR_NULL(esw_psample)) + if (IS_ERR_OR_NULL(tc_psample)) return; /* If slow path flag is set, sample action is not offloaded. * No need to delete sample rule. */ - esw = esw_psample->priv->mdev->priv.eswitch; + esw = tc_psample->esw; if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); return; } - sample_flow = esw_attr->sample->sample_flow; - pre_attr = sample_flow->pre_attr; - memset(pre_attr, 0, sizeof(*pre_attr)); - esw = esw_psample->priv->mdev->priv.eswitch; - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr); - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); - - sample_restore_put(esw_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id); - sampler_put(esw_psample, sample_flow->sampler); - tbl_attr.chain = attr->chain; - tbl_attr.prio = attr->prio; - tbl_attr.vport = esw_attr->in_rep->vport; - tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; - mlx5_esw_vporttbl_put(esw, &tbl_attr); + /* The following delete order can't be changed, otherwise, + * will hit fw syndromes. + */ + sample_flow = attr->sample_attr->sample_flow; + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); + if (!sample_flow->post_act_handle) + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, + sample_flow->post_attr); + + sample_restore_put(tc_psample, sample_flow->restore); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); + sampler_put(tc_psample, sample_flow->sampler); + if (sample_flow->post_act_handle) { + mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); + } else { + tbl_attr.chain = attr->chain; + tbl_attr.prio = attr->prio; + tbl_attr.vport = esw_attr->in_rep->vport; + tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + mlx5_esw_vporttbl_put(esw, &tbl_attr); + kfree(sample_flow->post_attr); + } - kfree(pre_attr->esw_attr->sample); - kfree(pre_attr); + kfree(sample_flow->pre_attr); kfree(sample_flow); } -struct mlx5_esw_psample * -mlx5_esw_sample_init(struct mlx5e_priv *priv) +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act) { - struct mlx5_esw_psample *esw_psample; + struct mlx5e_tc_psample *tc_psample; int err; - esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL); - if (!esw_psample) + tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL); + if (!tc_psample) return ERR_PTR(-ENOMEM); - esw_psample->priv = priv; - err = sampler_termtbl_create(esw_psample); + if (IS_ERR_OR_NULL(post_act)) { + err = PTR_ERR(post_act); + goto err_post_act; + } + tc_psample->post_act = post_act; + tc_psample->esw = esw; + err = sampler_termtbl_create(tc_psample); if (err) - goto err_termtbl; + goto err_post_act; - mutex_init(&esw_psample->ht_lock); - mutex_init(&esw_psample->restore_lock); + mutex_init(&tc_psample->ht_lock); + mutex_init(&tc_psample->restore_lock); - return esw_psample; + return tc_psample; -err_termtbl: - kfree(esw_psample); +err_post_act: + kfree(tc_psample); return ERR_PTR(err); } void -mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample) +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) { - if (IS_ERR_OR_NULL(esw_psample)) + if (IS_ERR_OR_NULL(tc_psample)) return; - mutex_destroy(&esw_psample->restore_lock); - mutex_destroy(&esw_psample->ht_lock); - sampler_termtbl_destroy(esw_psample); - kfree(esw_psample); + mutex_destroy(&tc_psample->restore_lock); + mutex_destroy(&tc_psample->ht_lock); + sampler_termtbl_destroy(tc_psample); + kfree(tc_psample); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h new file mode 100644 index 000000000000..db0146df9b30 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_SAMPLE_H__ +#define __MLX5_EN_TC_SAMPLE_H__ + +#include "eswitch.h" + +struct mlx5_flow_attr; +struct mlx5e_tc_psample; +struct mlx5e_post_act; + +struct mlx5e_sample_attr { + u32 group_num; + u32 rate; + u32 trunc_size; + u32 restore_obj_id; + u32 sampler_id; + struct mlx5e_sample_flow *sample_flow; +}; + +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); + +struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + u32 tunnel_id); + +void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act); + +void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); + +#endif /* __MLX5_EN_TC_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 91e7a01e32be..6c949abcd2e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -19,6 +19,7 @@ #include "en/tc_ct.h" #include "en/mod_hdr.h" #include "en/mapping.h" +#include "en/tc/post_act.h" #include "en.h" #include "en_tc.h" #include "en_rep.h" @@ -32,10 +33,6 @@ #define MLX5_CT_STATE_RELATED_BIT BIT(5) #define MLX5_CT_STATE_INVALID_BIT BIT(6) -#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen) -#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) -#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX - #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen) #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) @@ -46,14 +43,13 @@ struct mlx5_tc_ct_priv { struct mlx5_core_dev *dev; const struct net_device *netdev; struct mod_hdr_tbl *mod_hdr_tbl; - struct idr fte_ids; struct xarray tuple_ids; struct rhashtable zone_ht; struct rhashtable ct_tuples_ht; struct rhashtable ct_tuples_nat_ht; struct mlx5_flow_table *ct; struct mlx5_flow_table *ct_nat; - struct mlx5_flow_table *post_ct; + struct mlx5e_post_act *post_act; struct mutex control_lock; /* guards parallel adds/dels */ struct mapping_ctx *zone_mapping; struct mapping_ctx *labels_mapping; @@ -64,11 +60,9 @@ struct mlx5_tc_ct_priv { struct mlx5_ct_flow { struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_flow_attr *post_ct_attr; struct mlx5_flow_handle *pre_ct_rule; - struct mlx5_flow_handle *post_ct_rule; + struct mlx5e_post_act_handle *post_act_handle; struct mlx5_ct_ft *ft; - u32 fte_id; u32 chain_mapping; }; @@ -768,7 +762,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = 0; - attr->dest_ft = ct_priv->post_ct; + attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; attr->counter = entry->counter->counter; @@ -1432,7 +1426,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, ctstate |= MLX5_CT_STATE_NAT_BIT; mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); - dest.ft = ct_priv->post_ct; + dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1716,9 +1710,9 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do decap * v * +---------------------+ - * + pre_ct/pre_ct_nat + if matches +---------------------+ - * + zone+nat match +---------------->+ post_ct (see below) + - * +---------------------+ set zone +---------------------+ + * + pre_ct/pre_ct_nat + if matches +-------------------------+ + * + zone+nat match +---------------->+ post_act (see below) + + * +---------------------+ set zone +-------------------------+ * | set zone * v * +--------------------+ @@ -1732,7 +1726,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do nat (if needed) * v * +--------------+ - * + post_ct + original filter actions + * + post_act + original filter actions * + fte_id match +------------------------> * +--------------+ */ @@ -1746,19 +1740,15 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5_flow_spec *post_ct_spec = NULL; + struct mlx5e_post_act_handle *handle; struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; - struct mlx5_flow_handle *rule; struct mlx5_ct_flow *ct_flow; int chain_mapping = 0, err; struct mlx5_ct_ft *ft; - u32 fte_id = 1; - post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL); ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!post_ct_spec || !ct_flow) { - kvfree(post_ct_spec); + if (!ct_flow) { kfree(ct_flow); return ERR_PTR(-ENOMEM); } @@ -1773,14 +1763,13 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->ft = ft; - err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, - MLX5_FTE_ID_MAX, GFP_KERNEL); - if (err) { - netdev_warn(priv->netdev, - "Failed to allocate fte id, err: %d\n", err); - goto err_idr; + handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + ct_dbg("Failed to allocate post action handle"); + goto err_post_act_handle; } - ct_flow->fte_id = fte_id; + ct_flow->post_act_handle = handle; /* Base flow attributes of both rules on original rule attribute */ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); @@ -1789,15 +1778,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, goto err_alloc_pre; } - ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); - if (!ct_flow->post_ct_attr) { - err = -ENOMEM; - goto err_alloc_post; - } - pre_ct_attr = ct_flow->pre_ct_attr; memcpy(pre_ct_attr, attr, attr_sz); - memcpy(ct_flow->post_ct_attr, attr, attr_sz); /* Modify the original rule's action to fwd and modify, leave decap */ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; @@ -1823,10 +1805,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, goto err_mapping; } - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, - FTEID_TO_REG, fte_id); + err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts); if (err) { - ct_dbg("Failed to set fte_id register mapping"); + ct_dbg("Failed to set post action handle"); goto err_mapping; } @@ -1857,33 +1838,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } pre_ct_attr->modify_hdr = mod_hdr; - /* Post ct rule matches on fte_id and executes original rule's - * tc rule action - */ - mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, - fte_id, MLX5_FTE_ID_MASK); - - /* Put post_ct rule on post_ct flow table */ - ct_flow->post_ct_attr->chain = 0; - ct_flow->post_ct_attr->prio = 0; - ct_flow->post_ct_attr->ft = ct_priv->post_ct; - - /* Splits were handled before CT */ - if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) - ct_flow->post_ct_attr->esw_attr->split_count = 0; - - ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; - ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; - ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); - rule = mlx5_tc_rule_insert(priv, post_ct_spec, - ct_flow->post_ct_attr); - ct_flow->post_ct_rule = rule; - if (IS_ERR(ct_flow->post_ct_rule)) { - err = PTR_ERR(ct_flow->post_ct_rule); - ct_dbg("Failed to add post ct rule"); - goto err_insert_post_ct; - } - /* Change original rule point to ct table */ pre_ct_attr->dest_chain = 0; pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; @@ -1897,28 +1851,21 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, attr->ct_attr.ct_flow = ct_flow; dealloc_mod_hdr_actions(&pre_mod_acts); - kvfree(post_ct_spec); - return rule; + return ct_flow->pre_ct_rule; err_insert_orig: - mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, - ct_flow->post_ct_attr); -err_insert_post_ct: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: dealloc_mod_hdr_actions(&pre_mod_acts); mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); err_get_chain: - kfree(ct_flow->post_ct_attr); -err_alloc_post: kfree(ct_flow->pre_ct_attr); err_alloc_pre: - idr_remove(&ct_priv->fte_ids, fte_id); -err_idr: + mlx5e_tc_post_act_del(ct_priv->post_act, handle); +err_post_act_handle: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: - kvfree(post_ct_spec); kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); return ERR_PTR(err); @@ -2029,16 +1976,13 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - if (ct_flow->post_ct_rule) { - mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, - ct_flow->post_ct_attr); + if (ct_flow->post_act_handle) { mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); - idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); + mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle); mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); } kfree(ct_flow->pre_ct_attr); - kfree(ct_flow->post_ct_attr); kfree(ct_flow); } @@ -2064,11 +2008,6 @@ static int mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, const char **err_msg) { - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { - *err_msg = "firmware level support is missing"; - return -EOPNOTSUPP; - } - if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { /* vlan workaround should be avoided for multi chain rules. * This is just a sanity check as pop vlan action should @@ -2098,20 +2037,9 @@ mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, } static int -mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv, - const char **err_msg) -{ - if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { - *err_msg = "firmware level support is missing"; - return -EOPNOTSUPP; - } - - return 0; -} - -static int mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, const char **err_msg) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -2122,10 +2050,14 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, *err_msg = "tc skb extension missing"; return -EOPNOTSUPP; #endif + if (IS_ERR_OR_NULL(post_act)) { + *err_msg = "tc ct offload not supported, post action is missing"; + return -EOPNOTSUPP; + } + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) return mlx5_tc_ct_init_check_esw_support(esw, err_msg); - else - return mlx5_tc_ct_init_check_nic_support(priv, err_msg); + return 0; } #define INIT_ERR_PREFIX "tc ct offload init failed" @@ -2133,19 +2065,19 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, struct mlx5_tc_ct_priv * mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, struct mod_hdr_tbl *mod_hdr, - enum mlx5_flow_namespace_type ns_type) + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) { struct mlx5_tc_ct_priv *ct_priv; struct mlx5_core_dev *dev; const char *msg; + u64 mapping_id; int err; dev = priv->mdev; - err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg); + err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg); if (err) { - mlx5_core_warn(dev, - "tc ct offload not supported, %s\n", - msg); + mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg); goto err_support; } @@ -2153,13 +2085,17 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (!ct_priv) goto err_alloc; - ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true); + mapping_id = mlx5_query_nic_system_image_guid(dev); + + ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, + sizeof(u16), 0, true); if (IS_ERR(ct_priv->zone_mapping)) { err = PTR_ERR(ct_priv->zone_mapping); goto err_mapping_zone; } - ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true); + ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, + sizeof(u32) * 4, 0, true); if (IS_ERR(ct_priv->labels_mapping)) { err = PTR_ERR(ct_priv->labels_mapping); goto err_mapping_labels; @@ -2189,16 +2125,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, goto err_ct_nat_tbl; } - ct_priv->post_ct = mlx5_chains_create_global_table(chains); - if (IS_ERR(ct_priv->post_ct)) { - err = PTR_ERR(ct_priv->post_ct); - mlx5_core_warn(dev, - "%s, failed to create post ct table err: %d\n", - INIT_ERR_PREFIX, err); - goto err_post_ct_tbl; - } - - idr_init(&ct_priv->fte_ids); + ct_priv->post_act = post_act; mutex_init(&ct_priv->control_lock); rhashtable_init(&ct_priv->zone_ht, &zone_params); rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); @@ -2206,8 +2133,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, return ct_priv; -err_post_ct_tbl: - mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); err_ct_nat_tbl: mlx5_chains_destroy_global_table(chains, ct_priv->ct); err_ct_tbl: @@ -2232,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) chains = ct_priv->chains; - mlx5_chains_destroy_global_table(chains, ct_priv->post_ct); mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); mlx5_chains_destroy_global_table(chains, ct_priv->ct); mapping_destroy(ct_priv->zone_mapping); @@ -2242,7 +2166,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); rhashtable_destroy(&ct_priv->zone_ht); mutex_destroy(&ct_priv->control_lock); - idr_destroy(&ct_priv->fte_ids); kfree(ct_priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 644cf1641cde..363329f4aac6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -92,7 +92,8 @@ struct mlx5_ct_attr { struct mlx5_tc_ct_priv * mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, struct mod_hdr_tbl *mod_hdr, - enum mlx5_flow_namespace_type ns_type); + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act); void mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv); @@ -132,7 +133,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, static inline struct mlx5_tc_ct_priv * mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, struct mod_hdr_tbl *mod_hdr, - enum mlx5_flow_namespace_type ns_type) + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) { return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 8f79f04eccd6..b4e986818794 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -124,6 +124,11 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, if (IS_ERR(rt)) return PTR_ERR(rt); + if (rt->rt_type != RTN_UNICAST) { + ret = -ENETUNREACH; + goto err_rt_release; + } + if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { ret = -ENETUNREACH; goto err_rt_release; @@ -520,7 +525,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->out_dev = attr.out_dev; e->route_dev_ifindex = attr.route_dev->ifindex; - /* It's importent to add the neigh to the hash table before checking + /* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the * neigh changes it's validity state, we would find the relevant neigh * in the hash. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index c06267477b27..538bc2419bd8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -126,7 +126,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, /* Create a separate SQ, so that when the buff pool is disabled, we could * close this SQ safely and stop receiving CQEs. In other case, e.g., if * the XDPSQ was used instead, we might run into trouble when the buff pool - * is disabled and then reenabled, but the SQ continues receiving CQEs + * is disabled and then re-enabled, but the SQ continues receiving CQEs * from the old buff pool. */ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index c4db367d4baf..84eb7201c142 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -33,7 +33,7 @@ #include "en.h" /* mlx5e global resources should be placed in this file. - * Global resources are common to all the netdevices crated on the same nic. + * Global resources are common to all the netdevices created on the same nic. */ void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2cf59bb5f898..5696d3f1baaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -420,6 +420,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, unsigned int count = ch->combined_count; struct mlx5e_params new_params; bool arfs_enabled; + int rss_cnt; bool opened; int err = 0; @@ -455,6 +456,27 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } + /* Don't allow changing the number of channels if non-default RSS contexts exist, + * the kernel doesn't protect against set_channels operations that break them. + */ + rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1; + if (rss_cnt) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n", + __func__, rss_cnt); + goto out; + } + + /* Don't allow changing the number of channels if MQPRIO mode channel offload is active, + * because it defines a partition over the channels queues. + */ + if (cur_params->mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: MQPRIO mode channel offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_params = *cur_params; new_params.num_channels = count; @@ -1194,16 +1216,51 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, - u8 *hfunc) +static int mlx5e_get_rxfh_context(struct net_device *dev, u32 *indir, + u8 *key, u8 *hfunc, u32 rss_context) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_priv *priv = netdev_priv(dev); + int err; mutex_lock(&priv->state_lock); - mlx5e_rx_res_rss_get_rxfh(priv->rx_res, indir, key, hfunc); + err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, indir, key, hfunc); mutex_unlock(&priv->state_lock); + return err; +} - return 0; +static int mlx5e_set_rxfh_context(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc, + u32 *rss_context, bool delete) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + if (delete) { + err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); + goto unlock; + } + + if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { + unsigned int count = priv->channels.params.num_channels; + + err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); + if (err) + goto unlock; + } + + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + return mlx5e_get_rxfh_context(netdev, indir, key, hfunc, 0); } int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -1213,7 +1270,7 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, int err; mutex_lock(&priv->state_lock); - err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, indir, key, + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, 0, indir, key, hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); mutex_unlock(&priv->state_lock); return err; @@ -2299,6 +2356,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, + .get_rxfh_context = mlx5e_get_rxfh_context, + .set_rxfh_context = mlx5e_set_rxfh_context, .get_rxnfc = mlx5e_get_rxnfc, .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 5c754e9af669..c06b4b938ae7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1255,7 +1255,8 @@ static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) return 0; mlx5e_set_inner_ttc_params(priv, &ttc_params); - priv->fs.inner_ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + priv->fs.inner_ttc = mlx5_create_inner_ttc_table(priv->mdev, + &ttc_params); if (IS_ERR(priv->fs.inner_ttc)) return PTR_ERR(priv->fs.inner_ttc); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3d8918f9399e..03693fa74a70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -35,11 +35,19 @@ #include "en/params.h" #include "en/xsk/pool.h" +static int flow_type_to_traffic_type(u32 flow_type); + +static u32 flow_type_mask(u32 flow_type) +{ + return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); +} + struct mlx5e_ethtool_rule { struct list_head list; struct ethtool_rx_flow_spec flow_spec; struct mlx5_flow_handle *rule; struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_rss *rss; }; static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) @@ -66,7 +74,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, int table_size; int prio; - switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + switch (flow_type_mask(fs->flow_type)) { case TCP_V4_FLOW: case UDP_V4_FLOW: case TCP_V6_FLOW: @@ -329,7 +337,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, outer_headers); void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); - u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + u32 flow_type = flow_type_mask(fs->flow_type); switch (flow_type) { case TCP_V4_FLOW: @@ -397,10 +405,53 @@ static bool outer_header_zero(u32 *match_criteria) size - 1); } +static int flow_get_tirn(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule, + struct ethtool_rx_flow_spec *fs, + u32 rss_context, u32 *tirn) +{ + if (fs->flow_type & FLOW_RSS) { + struct mlx5e_lro_param lro_param; + struct mlx5e_rss *rss; + u32 flow_type; + int err; + int tt; + + rss = mlx5e_rx_res_rss_get(priv->rx_res, rss_context); + if (!rss) + return -ENOENT; + + flow_type = flow_type_mask(fs->flow_type); + tt = flow_type_to_traffic_type(flow_type); + if (tt < 0) + return -EINVAL; + + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn); + if (err) + return err; + eth_rule->rss = rss; + mlx5e_rss_refcnt_inc(eth_rule->rss); + } else { + struct mlx5e_params *params = &priv->channels.params; + enum mlx5e_rq_group group; + u16 ix; + + mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); + + *tirn = group == MLX5E_RQ_GROUP_XSK ? + mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix) : + mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix); + } + + return 0; +} + static struct mlx5_flow_handle * add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule, struct mlx5_flow_table *ft, - struct ethtool_rx_flow_spec *fs) + struct ethtool_rx_flow_spec *fs, u32 rss_context) { struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND }; struct mlx5_flow_destination *dst = NULL; @@ -419,23 +470,17 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, if (fs->ring_cookie == RX_CLS_FLOW_DISC) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { - struct mlx5e_params *params = &priv->channels.params; - enum mlx5e_rq_group group; - u16 ix; - - mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); - dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { err = -ENOMEM; goto free; } + err = flow_get_tirn(priv, eth_rule, fs, rss_context, &dst->tir_num); + if (err) + goto free; + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (group == MLX5E_RQ_GROUP_XSK) - dst->tir_num = mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix); - else - dst->tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } @@ -459,6 +504,8 @@ static void del_ethtool_rule(struct mlx5e_priv *priv, { if (eth_rule->rule) mlx5_del_flow_rules(eth_rule->rule); + if (eth_rule->rss) + mlx5e_rss_refcnt_dec(eth_rule->rss); list_del(ð_rule->list); priv->fs.ethtool.tot_num_rules--; put_flow_table(eth_rule->eth_ft); @@ -619,7 +666,7 @@ static int validate_flow(struct mlx5e_priv *priv, fs->ring_cookie)) return -EINVAL; - switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + switch (flow_type_mask(fs->flow_type)) { case ETHER_FLOW: num_tuples += validate_ethter(fs); break; @@ -668,7 +715,7 @@ static int validate_flow(struct mlx5e_priv *priv, static int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, - struct ethtool_rx_flow_spec *fs) + struct ethtool_rx_flow_spec *fs, u32 rss_context) { struct mlx5e_ethtool_table *eth_ft; struct mlx5e_ethtool_rule *eth_rule; @@ -699,7 +746,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, err = -EINVAL; goto del_ethtool_rule; } - rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs); + rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context); if (IS_ERR(rule)) { err = PTR_ERR(rule); goto del_ethtool_rule; @@ -745,10 +792,20 @@ mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, return -EINVAL; list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) { - if (eth_rule->flow_spec.location == location) { - info->fs = eth_rule->flow_spec; + int index; + + if (eth_rule->flow_spec.location != location) + continue; + if (!info) return 0; - } + info->fs = eth_rule->flow_spec; + if (!eth_rule->rss) + return 0; + index = mlx5e_rx_res_rss_index(priv->rx_res, eth_rule->rss); + if (index < 0) + return index; + info->rss_context = index; + return 0; } return -ENOENT; @@ -764,7 +821,7 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, info->data = MAX_NUM_OF_ETHTOOL_RULES; while ((!err || err == -ENOENT) && idx < info->rule_cnt) { - err = mlx5e_ethtool_get_flow(priv, info, location); + err = mlx5e_ethtool_get_flow(priv, NULL, location); if (!err) rule_locs[idx++] = location; location++; @@ -887,7 +944,7 @@ int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: - err = mlx5e_ethtool_flow_replace(priv, &cmd->fs); + err = mlx5e_ethtool_flow_replace(priv, &cmd->fs, cmd->rss_context); break; case ETHTOOL_SRXCLSRLDEL: err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 25a0b5f0984a..26d2f78c7706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1535,15 +1535,9 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; int err; u32 i; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - if (err) - return err; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1557,7 +1551,6 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1605,11 +1598,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) void *in; void *cqc; int inlen; - unsigned int irqn_not_used; int eqn; int err; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); if (err) return err; @@ -1627,7 +1619,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -1719,7 +1711,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, { int err, tc; - for (tc = 0; tc < params->num_tc; tc++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) { int txq_ix = c->ix + tc * params->num_channels; err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, @@ -1891,30 +1883,30 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_icosq; + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); + if (err) + goto err_close_sqs; + if (c->xdp) { err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->rq_xdpsq, false); if (err) - goto err_close_sqs; + goto err_close_rq; } - err = mlx5e_open_rxq_rq(c, params, &cparam->rq); - if (err) - goto err_close_xdp_sq; - err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); if (err) - goto err_close_rq; + goto err_close_xdp_sq; return 0; -err_close_rq: - mlx5e_close_rq(&c->rq); - err_close_xdp_sq: if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); +err_close_rq: + mlx5e_close_rq(&c->rq); + err_close_sqs: mlx5e_close_sqs(c); @@ -1949,9 +1941,9 @@ err_close_async_icosq_cq: static void mlx5e_close_queues(struct mlx5e_channel *c) { mlx5e_close_xdpsq(&c->xdpsq); - mlx5e_close_rq(&c->rq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); mlx5e_close_icosq(&c->async_icosq); @@ -1983,9 +1975,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel *c; unsigned int irq; int err; - int eqn; - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + err = mlx5_vector2irqn(priv->mdev, ix, &irq); if (err) return err; @@ -2001,7 +1992,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); - c->num_tc = params->num_tc; + c->num_tc = mlx5e_get_dcb_num_tc(params); c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; c->aff_mask = irq_get_effective_affinity_mask(irq); @@ -2272,22 +2263,34 @@ void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) ETH_MAX_MTU); } -static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc) +static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, + struct tc_mqprio_qopt_offload *mqprio) { - int tc; + int tc, err; netdev_reset_tc(netdev); if (ntc == 1) - return; + return 0; - netdev_set_num_tc(netdev, ntc); + err = netdev_set_num_tc(netdev, ntc); + if (err) { + netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc); + return err; + } - /* Map netdev TCs to offset 0 - * We have our own UP to TXQ mapping for QoS - */ - for (tc = 0; tc < ntc; tc++) - netdev_set_tc_queue(netdev, tc, nch, 0); + for (tc = 0; tc < ntc; tc++) { + u16 count, offset; + + /* For DCB mode, map netdev TCs to offset 0 + * We have our own UP to TXQ mapping for QoS + */ + count = mqprio ? mqprio->qopt.count[tc] : nch; + offset = mqprio ? mqprio->qopt.offset[tc] : 0; + netdev_set_tc_queue(netdev, tc, count, offset); + } + + return 0; } int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) @@ -2297,7 +2300,7 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) qos_queues = mlx5e_qos_cur_leaf_nodes(priv); nch = priv->channels.params.num_channels; - ntc = priv->channels.params.num_tc; + ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); num_txqs = nch * ntc + qos_queues; if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) num_txqs += ntc; @@ -2321,11 +2324,12 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) old_ntc = netdev->num_tc ? : 1; nch = priv->channels.params.num_channels; - ntc = priv->channels.params.num_tc; + ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); num_rxqs = nch * priv->profile->rq_groups; - mlx5e_netdev_set_tcs(netdev, nch, ntc); - + err = mlx5e_netdev_set_tcs(netdev, nch, ntc, NULL); + if (err) + goto err_out; err = mlx5e_update_tx_netdev_queues(priv); if (err) goto err_tcs; @@ -2346,7 +2350,8 @@ err_txqs: WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs)); err_tcs: - mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc); + mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, NULL); +err_out: return err; } @@ -2396,7 +2401,7 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) int i, ch, tc, num_tc; ch = priv->channels.num; - num_tc = priv->channels.params.num_tc; + num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); for (i = 0; i < ch; i++) { for (tc = 0; tc < num_tc; tc++) { @@ -2427,7 +2432,7 @@ static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv) { /* Sync with mlx5e_select_queue. */ WRITE_ONCE(priv->num_tc_x_num_ch, - priv->channels.params.num_tc * priv->channels.num); + mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num); } void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) @@ -2856,41 +2861,124 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) return 0; } -static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, - struct tc_mqprio_qopt *mqprio) +static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, + struct tc_mqprio_qopt *mqprio) { struct mlx5e_params new_params; u8 tc = mqprio->num_tc; - int err = 0; + int err; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; if (tc && tc != MLX5E_MAX_NUM_TC) return -EINVAL; - mutex_lock(&priv->state_lock); - - /* MQPRIO is another toplevel qdisc that can't be attached - * simultaneously with the offloaded HTB. - */ - if (WARN_ON(priv->htb.maj_id)) { - err = -EINVAL; - goto out; - } - new_params = priv->channels.params; - new_params.num_tc = tc ? tc : 1; + new_params.mqprio.mode = TC_MQPRIO_MODE_DCB; + new_params.mqprio.num_tc = tc ? tc : 1; err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_num_channels_changed_ctx, NULL, true); -out: priv->max_opened_tc = max_t(u8, priv->max_opened_tc, - priv->channels.params.num_tc); - mutex_unlock(&priv->state_lock); + mlx5e_get_dcb_num_tc(&priv->channels.params)); return err; } +static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct net_device *netdev = priv->netdev; + int agg_count = 0; + int i; + + if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 || + mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC) + return -EINVAL; + + for (i = 0; i < mqprio->qopt.num_tc; i++) { + if (!mqprio->qopt.count[i]) { + netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i); + return -EINVAL; + } + if (mqprio->min_rate[i]) { + netdev_err(netdev, "Min tx rate is not supported\n"); + return -EINVAL; + } + if (mqprio->max_rate[i]) { + netdev_err(netdev, "Max tx rate is not supported\n"); + return -EINVAL; + } + + if (mqprio->qopt.offset[i] != agg_count) { + netdev_err(netdev, "Discontinuous queues config is not supported\n"); + return -EINVAL; + } + agg_count += mqprio->qopt.count[i]; + } + + if (priv->channels.params.num_channels < agg_count) { + netdev_err(netdev, "Num of queues (%d) exceeds available (%d)\n", + agg_count, priv->channels.params.num_channels); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_mqprio_channel_set_tcs_ctx(struct mlx5e_priv *priv, void *ctx) +{ + struct tc_mqprio_qopt_offload *mqprio = (struct tc_mqprio_qopt_offload *)ctx; + struct net_device *netdev = priv->netdev; + u8 num_tc; + + if (priv->channels.params.mqprio.mode != TC_MQPRIO_MODE_CHANNEL) + return -EINVAL; + + num_tc = priv->channels.params.mqprio.num_tc; + mlx5e_netdev_set_tcs(netdev, 0, num_tc, mqprio); + + return 0; +} + +static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct mlx5e_params new_params; + int err; + + err = mlx5e_mqprio_channel_validate(priv, mqprio); + if (err) + return err; + + new_params = priv->channels.params; + new_params.mqprio.mode = TC_MQPRIO_MODE_CHANNEL; + new_params.mqprio.num_tc = mqprio->qopt.num_tc; + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_mqprio_channel_set_tcs_ctx, mqprio, true); + + return err; +} + +static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + /* MQPRIO is another toplevel qdisc that can't be attached + * simultaneously with the offloaded HTB. + */ + if (WARN_ON(priv->htb.maj_id)) + return -EINVAL; + + switch (mqprio->mode) { + case TC_MQPRIO_MODE_DCB: + return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt); + case TC_MQPRIO_MODE_CHANNEL: + return mlx5e_setup_tc_mqprio_channel(priv, mqprio); + default: + return -EOPNOTSUPP; + } +} + static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb) { int res; @@ -2960,7 +3048,10 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, priv, priv, true); } case TC_SETUP_QDISC_MQPRIO: - return mlx5e_setup_tc_mqprio(priv, type_data); + mutex_lock(&priv->state_lock); + err = mlx5e_setup_tc_mqprio(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; case TC_SETUP_QDISC_HTB: mutex_lock(&priv->state_lock); err = mlx5e_setup_tc_htb(priv, type_data); @@ -4102,12 +4193,12 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 params->hard_mtu = MLX5E_ETH_HARD_MTU; params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, priv->max_nch); - params->num_tc = 1; + params->mqprio.num_tc = 1; /* Set an initial non-zero value, so that mlx5e_select_queue won't * divide by zero if called before first activating channels. */ - priv->num_tc_x_num_ch = params->num_channels * params->num_tc; + priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; /* SQ */ params->log_sq_size = is_kdump_kernel() ? diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1e520640f7e0..eb83f27850c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -49,6 +49,7 @@ #include "en/devlink.h" #include "fs_core.h" #include "lib/mlx5.h" +#include "lib/devcom.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" #include "en_accel/ipsec.h" @@ -310,6 +311,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) { mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + if (rep_sq->send_to_vport_rule_peer) + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); list_del(&rep_sq->list); kfree(rep_sq); } @@ -319,6 +322,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u32 *sqns_array, int sqns_num) { + struct mlx5_eswitch *peer_esw = NULL; struct mlx5_flow_handle *flow_rule; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; @@ -329,6 +333,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, return 0; rpriv = mlx5e_rep_to_rep_priv(rep); + if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + for (i = 0; i < sqns_num; i++) { rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL); if (!rep_sq) { @@ -337,7 +345,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, } /* Add re-inject rule to the PF/representor sqs */ - flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep, + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -345,12 +353,34 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, goto out_err; } rep_sq->send_to_vport_rule = flow_rule; + rep_sq->sqn = sqns_array[i]; + + if (peer_esw) { + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, + rep, sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + kfree(rep_sq); + goto out_err; + } + rep_sq->send_to_vport_rule_peer = flow_rule; + } + list_add(&rep_sq->list, &rpriv->vport_sqs_list); } + + if (peer_esw) + mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return 0; out_err: mlx5e_sqs2vport_stop(esw, rep); + + if (peer_esw) + mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; } @@ -364,7 +394,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) int err = -ENOMEM; u32 *sqs; - sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL); + sqs = kcalloc(priv->channels.num * mlx5e_get_dcb_num_tc(&priv->channels.params), + sizeof(*sqs), GFP_KERNEL); if (!sqs) goto out; @@ -581,7 +612,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - params->num_tc = 1; + params->mqprio.num_tc = 1; params->tunneled_offload_en = false; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); @@ -1247,10 +1278,64 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + if (!rep_sq->send_to_vport_rule_peer) + continue; + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); + rep_sq->send_to_vport_rule_peer = NULL; + } +} + +static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + struct mlx5_eswitch *peer_esw) +{ + struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + if (rep_sq->send_to_vport_rule_peer) + continue; + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn); + if (IS_ERR(flow_rule)) + goto err_out; + rep_sq->send_to_vport_rule_peer = flow_rule; + } + + return 0; +err_out: + mlx5e_vport_rep_event_unpair(rep); + return PTR_ERR(flow_rule); +} + +static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + enum mlx5_switchdev_event event, + void *data) +{ + int err = 0; + + if (event == MLX5_SWITCHDEV_EVENT_PAIR) + err = mlx5e_vport_rep_event_pair(esw, rep, data); + else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR) + mlx5e_vport_rep_event_unpair(rep); + + return err; +} + static const struct mlx5_eswitch_rep_ops rep_ops = { .load = mlx5e_vport_rep_load, .unload = mlx5e_vport_rep_unload, - .get_proto_dev = mlx5e_vport_rep_get_proto_dev + .get_proto_dev = mlx5e_vport_rep_get_proto_dev, + .event = mlx5e_vport_rep_event, }; static int mlx5e_rep_probe(struct auxiliary_device *adev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 47a2dfb7792a..48a203a9e7d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -60,6 +60,7 @@ struct mlx5e_neigh_update_table { struct mlx5_tc_ct_priv; struct mlx5e_rep_bond; struct mlx5e_tc_tun_encap; +struct mlx5e_post_act; struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by @@ -88,8 +89,9 @@ struct mlx5_rep_uplink_priv { /* maps tun_enc_opts to a unique id*/ struct mapping_ctx *tunnel_enc_opts_mapping; + struct mlx5e_post_act *post_act; struct mlx5_tc_ct_priv *ct_priv; - struct mlx5_esw_psample *esw_psample; + struct mlx5e_tc_psample *tc_psample; /* support eswitch vports bonding */ struct mlx5e_rep_bond *bond; @@ -146,7 +148,7 @@ struct mlx5e_neigh_hash_entry { */ refcount_t refcnt; - /* Save the last reported time offloaded trafic pass over one of the + /* Save the last reported time offloaded traffic pass over one of the * neigh hash entry flows. Use it to periodically update the neigh * 'used' value and avoid neigh deleting by the kernel. */ @@ -207,6 +209,8 @@ struct mlx5e_encap_entry { struct mlx5e_rep_sq { struct mlx5_flow_handle *send_to_vport_rule; + struct mlx5_flow_handle *send_to_vport_rule_peer; + u32 sqn; struct list_head list; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 349a93e0213d..6603d9c823a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -47,6 +47,7 @@ #include <net/bareudp.h> #include <net/bonding.h> #include "en.h" +#include "en/tc/post_act.h" #include "en_rep.h" #include "en/rep/tc.h" #include "en/rep/neigh.h" @@ -60,7 +61,7 @@ #include "en/mod_hdr.h" #include "en/tc_priv.h" #include "en/tc_tun_encap.h" -#include "esw/sample.h" +#include "en/tc/sample.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@ -97,7 +98,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [MARK_TO_REG] = mark_to_reg_ct, [LABELS_TO_REG] = labels_to_reg_ct, [FTEID_TO_REG] = fteid_to_reg_ct, - /* For NIC rules we store the retore metadata directly + /* For NIC rules we store the restore metadata directly * into reg_b that is passed to SW since we don't * jump between steering domains. */ @@ -246,7 +247,7 @@ get_ct_priv(struct mlx5e_priv *priv) } #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) -static struct mlx5_esw_psample * +static struct mlx5e_tc_psample * get_sample_priv(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -257,7 +258,7 @@ get_sample_priv(struct mlx5e_priv *priv) uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &uplink_rpriv->uplink_priv; - return uplink_priv->esw_psample; + return uplink_priv->tc_psample; } return NULL; @@ -1147,7 +1148,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, mod_hdr_acts); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) } else if (flow_flag_test(flow, SAMPLE)) { - rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr); + rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, + mlx5e_tc_get_flow_tun_id(flow)); #endif } else { rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); @@ -1186,7 +1188,7 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) if (flow_flag_test(flow, SAMPLE)) { - mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); return; } #endif @@ -1550,6 +1552,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, else mlx5e_detach_mod_hdr(priv, flow); } + kfree(attr->sample_attr); kvfree(attr->parse_attr); kvfree(attr->esw_attr->rx_tun_attr); @@ -1559,7 +1562,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); - kfree(flow->attr->esw_attr->sample); kfree(flow->attr); } @@ -1624,17 +1626,22 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } -static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_action *flow_action = &rule->action; const struct flow_action_entry *act; int i; + if (chain) + return false; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_GOTO: return true; + case FLOW_ACTION_SAMPLE: + return true; default: continue; } @@ -1875,7 +1882,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return -EOPNOTSUPP; needs_mapping = !!flow->attr->chain; - sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f); + sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); *match_inner = !needs_mapping; if ((needs_mapping || sets_mapping) && @@ -2448,7 +2455,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; } } - /* Currenlty supported only for MPLS over UDP */ + /* Currently supported only for MPLS over UDP */ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && !netif_is_bareudp(filter_dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -2702,7 +2709,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, if (s_mask && a_mask) { NL_SET_ERR_MSG_MOD(extack, "can't set and add to the same HW field"); - printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); + netdev_warn(priv->netdev, + "mlx5: can't set and add to the same HW field (%x)\n", + f->field); return -EOPNOTSUPP; } @@ -2741,8 +2750,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, if (first < next_z && next_z < last) { NL_SET_ERR_MSG_MOD(extack, "rewrite of few sub-fields isn't supported"); - printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", - mask); + netdev_warn(priv->netdev, + "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", + mask); return -EOPNOTSUPP; } @@ -3713,13 +3723,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_sample_attr sample_attr = {}; const struct ip_tunnel_info *info = NULL; struct mlx5_flow_attr *attr = flow->attr; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_sample_attr sample = {}; bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; @@ -3990,10 +4000,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); return -EOPNOTSUPP; } - sample.rate = act->sample.rate; - sample.group_num = act->sample.psample_group->group_num; + sample_attr.rate = act->sample.rate; + sample_attr.group_num = act->sample.psample_group->group_num; if (act->sample.truncate) - sample.trunc_size = act->sample.trunc_size; + sample_attr.trunc_size = act->sample.trunc_size; flow_flag_set(flow, SAMPLE); break; default: @@ -4078,10 +4088,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, * no errors after parsing. */ if (flow_flag_test(flow, SAMPLE)) { - esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL); - if (!esw_attr->sample) + attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); + if (!attr->sample_attr) return -ENOMEM; - *esw_attr->sample = sample; + *attr->sample_attr = sample_attr; } return 0; @@ -4679,7 +4689,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, rate_mbps = max_t(u32, rate, 1); } - err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); if (err) NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); @@ -4851,6 +4861,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) struct mlx5_core_dev *dev = priv->mdev; struct mapping_ctx *chains_mapping; struct mlx5_chains_attr attr = {}; + u64 mapping_id; int err; mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); @@ -4864,8 +4875,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); - chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj), - MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + mapping_id = mlx5_query_nic_system_image_guid(dev); + + chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + if (IS_ERR(chains_mapping)) { err = PTR_ERR(chains_mapping); goto err_mapping; @@ -4887,8 +4902,9 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) goto err_chains; } + tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, - MLX5_FLOW_NAMESPACE_KERNEL); + MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, @@ -4904,6 +4920,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) err_reg: mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); mlx5_chains_destroy(tc->chains); err_chains: mapping_destroy(chains_mapping); @@ -4942,6 +4959,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mutex_destroy(&tc->t_lock); mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); } @@ -4954,6 +4972,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) struct mapping_ctx *mapping; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; + u64 mapping_id; int err = 0; uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); @@ -4961,17 +4980,24 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) priv = netdev_priv(rpriv->netdev); esw = priv->mdev->priv.eswitch; + uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), + MLX5_FLOW_NAMESPACE_FDB); uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), esw_chains(esw), &esw->offloads.mod_hdr, - MLX5_FLOW_NAMESPACE_FDB); + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev)); + uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); #endif - mapping = mapping_create(sizeof(struct tunnel_match_key), - TUNNEL_INFO_BITS_MASK, true); + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, + sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_tun_mapping; @@ -4979,7 +5005,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) uplink_priv->tunnel_mapping = mapping; /* 0xFFF is reserved for stack devices slow path table mark */ - mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, + sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_enc_opts_mapping; @@ -5008,11 +5035,12 @@ err_enc_opts_mapping: mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - mlx5_esw_sample_cleanup(uplink_priv->esw_psample); + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); #endif mlx5_tc_ct_clean(uplink_priv->ct_priv); netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); return err; } @@ -5029,9 +5057,10 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) mapping_destroy(uplink_priv->tunnel_mapping); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - mlx5_esw_sample_cleanup(uplink_priv->esw_psample); + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); #endif mlx5_tc_ct_clean(uplink_priv->ct_priv); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index f7cbeb0b66d2..1a4cd882f0fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -70,6 +70,7 @@ struct mlx5_flow_attr { struct mlx5_fc *counter; struct mlx5_modify_hdr *modify_hdr; struct mlx5_ct_attr ct_attr; + struct mlx5e_sample_attr *sample_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; u32 chain; u16 prio; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6e074cc457de..605c8ecc3610 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -855,8 +855,8 @@ clean: return err; } -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) +static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; @@ -865,8 +865,10 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { - *eqn = eq->core.eqn; - *irqn = eq->core.irqn; + if (irqn) + *irqn = eq->core.irqn; + if (eqn) + *eqn = eq->core.eqn; err = 0; break; } @@ -874,8 +876,18 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, return err; } + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) +{ + return vector2eqnirqn(dev, vector, eqn, NULL); +} EXPORT_SYMBOL(mlx5_vector2eqn); +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +{ + return vector2eqnirqn(dev, vector, NULL, irqn); +} + unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { return dev->priv.eq_table->num_comp_eqs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 505bf811984a..2e504c7461c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) vport->egress.offloads.fwd_rule = NULL; } +static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport) +{ + if (!vport->egress.offloads.bounce_rule) + return; + + mlx5_del_flow_rules(vport->egress.offloads.bounce_rule); + vport->egress.offloads.bounce_rule = NULL; +} + static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, struct mlx5_flow_destination *fwd_dest) @@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) { esw_acl_egress_vlan_destroy(vport); esw_acl_egress_ofld_fwd2vport_destroy(vport); + esw_acl_egress_ofld_bounce_rule_destroy(vport); } static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp); vport->egress.offloads.fwd_grp = NULL; } + + if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; + } + esw_acl_egress_vlan_grp_destroy(vport); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index a6e1d4f78268..7e221038df8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -5,6 +5,7 @@ #include <linux/notifier.h> #include <net/netevent.h> #include <net/switchdev.h> +#include "lib/devcom.h" #include "bridge.h" #include "eswitch.h" #include "bridge_priv.h" @@ -56,7 +57,6 @@ struct mlx5_esw_bridge { struct list_head fdb_list; struct rhashtable fdb_ht; - struct xarray vports; struct mlx5_flow_table *egress_ft; struct mlx5_flow_group *egress_vlan_fg; @@ -69,7 +69,7 @@ static void mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid, unsigned long val) { - struct switchdev_notifier_fdb_info send_info; + struct switchdev_notifier_fdb_info send_info = {}; send_info.addr = addr; send_info.vid = vid; @@ -77,6 +77,15 @@ mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char * call_switchdev_notifiers(val, dev, &send_info.info, NULL); } +static void +mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry) +{ + if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER))) + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_DEL_TO_BRIDGE); +} + static struct mlx5_flow_table * mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) { @@ -400,9 +409,10 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) } static struct mlx5_flow_handle * -mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, - struct mlx5_esw_bridge *bridge) +mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge, + struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; struct mlx5_flow_act flow_act = { @@ -430,7 +440,7 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, MLX5_SET(fte_match_param, rule_spec->match_criteria, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); if (vlan && vlan->pkt_reformat_push) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; @@ -459,6 +469,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, } static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, bridge->br_offloads->esw); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom; + static struct mlx5_flow_handle *handle; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return ERR_PTR(-ENODEV); + + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, peer_esw); + + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return handle; +} + +static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr, struct mlx5_esw_bridge *bridge) { @@ -505,7 +544,7 @@ mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *a } static struct mlx5_flow_handle * -mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, +mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr, struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge *bridge) { @@ -550,6 +589,10 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, vlan->vid); } + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = esw_owner_vhca_id; + } handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1); kvfree(rule_spec); @@ -576,10 +619,9 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, goto err_fdb_ht; INIT_LIST_HEAD(&bridge->fdb_list); - xa_init(&bridge->vports); bridge->ifindex = ifindex; bridge->refcnt = 1; - bridge->ageing_time = BR_DEFAULT_AGEING_TIME; + bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); list_add(&bridge->list, &br_offloads->bridges); return bridge; @@ -603,7 +645,6 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, return; mlx5_esw_bridge_egress_table_cleanup(bridge); - WARN_ON(!xa_empty(&bridge->vports)); list_del(&bridge->list); rhashtable_destroy(&bridge->fdb_ht); kvfree(bridge); @@ -639,30 +680,40 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads return bridge; } +static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id) +{ + return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE; +} + +static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port) +{ + return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id); +} + static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port, - struct mlx5_esw_bridge *bridge) + struct mlx5_esw_bridge_offloads *br_offloads) { - return xa_insert(&bridge->vports, port->vport_num, port, GFP_KERNEL); + return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL); } static struct mlx5_esw_bridge_port * -mlx5_esw_bridge_port_lookup(u16 vport_num, struct mlx5_esw_bridge *bridge) +mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads) { - return xa_load(&bridge->vports, vport_num); + return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num, + esw_owner_vhca_id)); } static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port, - struct mlx5_esw_bridge *bridge) + struct mlx5_esw_bridge_offloads *br_offloads) { - xa_erase(&bridge->vports, port->vport_num); + xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port)); } -static void mlx5_esw_bridge_fdb_entry_refresh(unsigned long lastuse, - struct mlx5_esw_bridge_fdb_entry *entry) +static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry) { trace_mlx5_esw_bridge_fdb_entry_refresh(entry); - entry->lastuse = lastuse; mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, entry->key.vid, SWITCHDEV_FDB_ADD_TO_BRIDGE); @@ -690,10 +741,7 @@ static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge) struct mlx5_esw_bridge_fdb_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) { - if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)) - mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, - entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } } @@ -841,10 +889,7 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge_fdb_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) { - if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)) - mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, - entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } @@ -875,13 +920,13 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port, } static struct mlx5_esw_bridge_vlan * -mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge *bridge, - struct mlx5_eswitch *esw) +mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge_vlan *vlan; - port = mlx5_esw_bridge_port_lookup(vport_num, bridge); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads); if (!port) { /* FDB is added asynchronously on wq while port might have been deleted * concurrently. Report on 'info' logging level and skip the FDB offload. @@ -904,24 +949,23 @@ mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge } static struct mlx5_esw_bridge_fdb_entry * -mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr, - u16 vid, bool added_by_user, struct mlx5_eswitch *esw, - struct mlx5_esw_bridge *bridge) +mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, bool added_by_user, bool peer, + struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge) { struct mlx5_esw_bridge_vlan *vlan = NULL; struct mlx5_esw_bridge_fdb_entry *entry; struct mlx5_flow_handle *handle; struct mlx5_fc *counter; - struct mlx5e_priv *priv; int err; if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) { - vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw); + vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge, + esw); if (IS_ERR(vlan)) return ERR_CAST(vlan); } - priv = netdev_priv(dev); entry = kvzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return ERR_PTR(-ENOMEM); @@ -930,19 +974,25 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi entry->key.vid = vid; entry->dev = dev; entry->vport_num = vport_num; + entry->esw_owner_vhca_id = esw_owner_vhca_id; entry->lastuse = jiffies; if (added_by_user) entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER; + if (peer) + entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER; - counter = mlx5_fc_create(priv->mdev, true); + counter = mlx5_fc_create(esw->dev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); goto err_ingress_fc_create; } entry->ingress_counter = counter; - handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter), - bridge); + handle = peer ? + mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan, + mlx5_fc_id(counter), bridge) : + mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, + mlx5_fc_id(counter), bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n", @@ -962,7 +1012,8 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi entry->filter_handle = handle; } - handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vlan, bridge); + handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan, + bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n", @@ -994,32 +1045,37 @@ err_egress_flow_create: err_ingress_filter_flow_create: mlx5_del_flow_rules(entry->ingress_handle); err_ingress_flow_create: - mlx5_fc_destroy(priv->mdev, entry->ingress_counter); + mlx5_fc_destroy(esw->dev, entry->ingress_counter); err_ingress_fc_create: kvfree(entry); return ERR_PTR(err); } -int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads) { - if (!vport->bridge) + struct mlx5_esw_bridge_port *port; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) return -EINVAL; - vport->bridge->ageing_time = ageing_time; + port->bridge->ageing_time = clock_t_to_jiffies(ageing_time); return 0; } -int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads) { + struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge *bridge; bool filtering; - if (!vport->bridge) + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) return -EINVAL; - bridge = vport->bridge; + bridge = port->bridge; filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; if (filtering == enable) return 0; @@ -1033,114 +1089,143 @@ int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw, return 0; } -static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_esw_bridge *bridge, - struct mlx5_vport *vport) +static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) { struct mlx5_eswitch *esw = br_offloads->esw; struct mlx5_esw_bridge_port *port; int err; port = kvzalloc(sizeof(*port), GFP_KERNEL); - if (!port) { - err = -ENOMEM; - goto err_port_alloc; - } + if (!port) + return -ENOMEM; - port->vport_num = vport->vport; + port->vport_num = vport_num; + port->esw_owner_vhca_id = esw_owner_vhca_id; + port->bridge = bridge; + port->flags |= flags; xa_init(&port->vlans); - err = mlx5_esw_bridge_port_insert(port, bridge); + err = mlx5_esw_bridge_port_insert(port, br_offloads); if (err) { - esw_warn(esw->dev, "Failed to insert port metadata (vport=%u,err=%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n", + port->vport_num, port->esw_owner_vhca_id, err); goto err_port_insert; } trace_mlx5_esw_bridge_vport_init(port); - vport->bridge = bridge; return 0; err_port_insert: kvfree(port); -err_port_alloc: - mlx5_esw_bridge_put(br_offloads, bridge); return err; } static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport) + struct mlx5_esw_bridge_port *port) { - struct mlx5_esw_bridge *bridge = vport->bridge; + u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id; + struct mlx5_esw_bridge *bridge = port->bridge; struct mlx5_esw_bridge_fdb_entry *entry, *tmp; - struct mlx5_esw_bridge_port *port; list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) - if (entry->vport_num == vport->vport) + if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id) mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); - port = mlx5_esw_bridge_port_lookup(vport->vport, bridge); - if (!port) { - WARN(1, "Vport %u metadata not found on bridge", vport->vport); - return -EINVAL; - } - trace_mlx5_esw_bridge_vport_cleanup(port); mlx5_esw_bridge_port_vlans_flush(port, bridge); - mlx5_esw_bridge_port_erase(port, bridge); + mlx5_esw_bridge_port_erase(port, br_offloads); kvfree(port); mlx5_esw_bridge_put(br_offloads, bridge); - vport->bridge = NULL; return 0; } -int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack) +static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) { struct mlx5_esw_bridge *bridge; int err; - WARN_ON(vport->bridge); - bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads); if (IS_ERR(bridge)) { NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex"); return PTR_ERR(bridge); } - err = mlx5_esw_bridge_vport_init(br_offloads, bridge, vport); - if (err) + err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge); + if (err) { NL_SET_ERR_MSG_MOD(extack, "Error initializing port"); + goto err_vport; + } + return 0; + +err_vport: + mlx5_esw_bridge_put(br_offloads, bridge); return err; } -int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack) +int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) { - struct mlx5_esw_bridge *bridge = vport->bridge; + return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_port *port; int err; - if (!bridge) { + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) { NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge"); return -EINVAL; } - if (bridge->ifindex != ifindex) { + if (port->bridge->ifindex != ifindex) { NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge"); return -EINVAL; } - err = mlx5_esw_bridge_vport_cleanup(br_offloads, vport); + err = mlx5_esw_bridge_vport_cleanup(br_offloads, port); if (err) NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed"); return err; } -int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, struct netlink_ext_ack *extack) +int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch)) + return 0; + + return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, + MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads, + extack); +} + +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) { struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge_vlan *vlan; - port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); if (!port) return -EINVAL; @@ -1148,10 +1233,10 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, if (vlan) { if (vlan->flags == flags) return 0; - mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge); + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); } - vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, esw); + vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, br_offloads->esw); if (IS_ERR(vlan)) { NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry"); return PTR_ERR(vlan); @@ -1159,62 +1244,93 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, return 0; } -void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge_vlan *vlan; - port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); if (!port) return; vlan = mlx5_esw_bridge_vlan_lookup(vid, port); if (!vlan) return; - mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge); + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); } -void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - struct switchdev_notifier_fdb_info *fdb_info) +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) { - struct mlx5_esw_bridge *bridge = vport->bridge; struct mlx5_esw_bridge_fdb_entry *entry; - u16 vport_num = vport->vport; + struct mlx5_esw_bridge_fdb_key key; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; - if (!bridge) { - esw_info(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) + return; + + bridge = port->bridge; + ether_addr_copy(key.addr, fdb_info->addr); + key.vid = fdb_info->vid; + entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + key.addr, key.vid, vport_num); return; } - entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, fdb_info->addr, fdb_info->vid, - fdb_info->added_by_user, esw, bridge); + entry->lastuse = jiffies; +} + +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + bridge = port->bridge; + entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr, + fdb_info->vid, fdb_info->added_by_user, + port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads->esw, bridge); if (IS_ERR(entry)) return; if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, SWITCHDEV_FDB_OFFLOADED); - else + else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER)) /* Take over dynamic entries to prevent kernel bridge from aging them out. */ mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, SWITCHDEV_FDB_ADD_TO_BRIDGE); } -void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info) { - struct mlx5_esw_bridge *bridge = vport->bridge; + struct mlx5_eswitch *esw = br_offloads->esw; struct mlx5_esw_bridge_fdb_entry *entry; struct mlx5_esw_bridge_fdb_key key; - u16 vport_num = vport->vport; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; - if (!bridge) { - esw_warn(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) return; - } + bridge = port->bridge; ether_addr_copy(key.addr, fdb_info->addr); key.vid = fdb_info->vid; entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params); @@ -1225,9 +1341,7 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw return; } - if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)) - mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } @@ -1245,11 +1359,10 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) continue; if (time_after(lastuse, entry->lastuse)) { - mlx5_esw_bridge_fdb_entry_refresh(lastuse, entry); - } else if (time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) { - mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, - entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_entry_refresh(entry); + } else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) && + time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) { + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } } @@ -1258,13 +1371,11 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads) { - struct mlx5_eswitch *esw = br_offloads->esw; - struct mlx5_vport *vport; + struct mlx5_esw_bridge_port *port; unsigned long i; - mlx5_esw_for_each_vport(esw, i, vport) - if (vport->bridge) - mlx5_esw_bridge_vport_cleanup(br_offloads, vport); + xa_for_each(&br_offloads->ports, i, port) + mlx5_esw_bridge_vport_cleanup(br_offloads, port); WARN_ONCE(!list_empty(&br_offloads->bridges), "Cleaning up bridge offloads while still having bridges attached\n"); @@ -1279,6 +1390,7 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&br_offloads->bridges); + xa_init(&br_offloads->ports); br_offloads->esw = esw; esw->br_offloads = br_offloads; @@ -1293,6 +1405,7 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) return; mlx5_esw_bridge_flush(br_offloads); + WARN_ON(!xa_empty(&br_offloads->ports)); esw->br_offloads = NULL; kvfree(br_offloads); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index d826942b27fc..efc39975226e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -7,6 +7,7 @@ #include <linux/notifier.h> #include <linux/list.h> #include <linux/workqueue.h> +#include <linux/xarray.h> #include "eswitch.h" struct mlx5_flow_table; @@ -15,6 +16,8 @@ struct mlx5_flow_group; struct mlx5_esw_bridge_offloads { struct mlx5_eswitch *esw; struct list_head bridges; + struct xarray ports; + struct notifier_block netdev_nb; struct notifier_block nb_blk; struct notifier_block nb; @@ -31,23 +34,36 @@ struct mlx5_esw_bridge_offloads { struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw); void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw); -int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack); -void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); -void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads); -int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, struct netlink_ext_ack *extack); -void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads); #endif /* __MLX5_ESW_BRIDGE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index d9ab2e8bc2cb..52964a82d6a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -19,6 +19,11 @@ struct mlx5_esw_bridge_fdb_key { enum { MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), + MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), +}; + +enum { + MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0), }; struct mlx5_esw_bridge_fdb_entry { @@ -28,6 +33,7 @@ struct mlx5_esw_bridge_fdb_entry { struct list_head list; struct list_head vlan_list; u16 vport_num; + u16 esw_owner_vhca_id; u16 flags; struct mlx5_flow_handle *ingress_handle; @@ -47,6 +53,9 @@ struct mlx5_esw_bridge_vlan { struct mlx5_esw_bridge_port { u16 vport_num; + u16 esw_owner_vhca_id; + u16 flags; + struct mlx5_esw_bridge *bridge; struct xarray vlans; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 1703384eca95..20af557ae30c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -91,9 +91,15 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ if (err) goto reg_err; + err = devlink_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + vport->dl_port = dl_port; return 0; +rate_err: + devlink_port_unregister(dl_port); reg_err: mlx5_esw_dl_port_free(dl_port); return err; @@ -109,6 +115,12 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devlink_rate_leaf_destroy(vport->dl_port); + } + devlink_port_unregister(vport->dl_port); mlx5_esw_dl_port_free(vport->dl_port); vport->dl_port = NULL; @@ -148,8 +160,16 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p if (err) return err; + err = devlink_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + vport->dl_port = dl_port; return 0; + +rate_err: + devlink_port_unregister(dl_port); + return err; } void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) @@ -159,6 +179,12 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devlink_rate_leaf_destroy(vport->dl_port); + } + devlink_port_unregister(vport->dl_port); vport->dl_port = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h index 227964b7d3b9..3401188e0a60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -85,11 +85,18 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template, TP_ARGS(port), TP_STRUCT__entry( __field(u16, vport_num) + __field(u16, esw_owner_vhca_id) + __field(u16, flags) ), TP_fast_assign( __entry->vport_num = port->vport_num; + __entry->esw_owner_vhca_id = port->esw_owner_vhca_id; + __entry->flags = port->flags; ), - TP_printk("vport_num=%hu", __entry->vport_num) + TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx", + __entry->vport_num, + __entry->esw_owner_vhca_id, + __entry->flags) ); DEFINE_EVENT(mlx5_esw_bridge_port_template, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h new file mode 100644 index 000000000000..458baf0c6415 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_TP_ + +#include <linux/tracepoint.h> +#include "eswitch.h" + +TRACE_EVENT(mlx5_esw_vport_qos_destroy, + TP_PROTO(const struct mlx5_vport *vport), + TP_ARGS(vport), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix + ) +); + +DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + __field(void *, group) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + __entry->group = vport->qos.group; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate, __entry->group + ) +); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + ), + TP_printk("(%s) group=%p tsar_ix=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix + ) +); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +TRACE_EVENT(mlx5_esw_group_qos_config, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix, u32 bw_share, u32 max_rate), + TP_ARGS(dev, group, tsar_ix, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + ), + TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate + ) +); +#endif /* _MLX5_ESW_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE qos_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index d9041b16611d..df277a6cddc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "esw/qos.h" enum { LEGACY_VEPA_PRIO = 0, @@ -508,3 +509,22 @@ unlock: mutex_unlock(&esw->state_lock); return err; } + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, + u32 max_rate, u32 min_rate) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL); + if (!err) + err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL); + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c new file mode 100644 index 000000000000..985e305179d1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -0,0 +1,869 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "eswitch.h" +#include "esw/qos.h" +#include "en/port.h" +#define CREATE_TRACE_POINTS +#include "diag/qos_tracepoint.h" + +/* Minimum supported BW share value by the HW is 1 Mbit/sec */ +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) + +struct mlx5_esw_rate_group { + u32 tsar_ix; + u32 max_rate; + u32 min_rate; + u32 bw_share; + struct list_head list; +}; + +static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, + u32 parent_ix, u32 tsar_ix, + u32 max_rate, u32 bw_share) +{ + u32 bitmask = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + + return mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + tsar_ix, + bitmask); +} + +static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + err = esw_qos_tsar_config(dev, sched_ctx, + esw->qos.root_tsar_ix, group->tsar_ix, + max_rate, bw_share); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); + + trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); + + return err; +} + +static int esw_qos_vport_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + if (!vport->qos.enabled) + return -EIO; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, + element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + + err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix, + max_rate, bw_share); + if (err) { + esw_warn(esw->dev, + "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); + return err; + } + + trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); + + return 0; +} + +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + bool group_level) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 max_guarantee = 0; + unsigned long i; + + if (group_level) { + struct mlx5_esw_rate_group *group; + + list_for_each_entry(group, &esw->qos.groups, list) { + if (group->min_rate < max_guarantee) + continue; + max_guarantee = group->min_rate; + } + } else { + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || + evport->qos.group != group || evport->qos.min_rate < max_guarantee) + continue; + max_guarantee = evport->qos.min_rate; + } + } + + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + + /* If vports min rate divider is 0 but their group has bw_share configured, then + * need to set bw_share for vports to minimal value. + */ + if (!group_level && !max_guarantee && group->bw_share) + return 1; + return 0; +} + +static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +{ + if (divider) + return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); + + return 0; +} + +static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); + struct mlx5_vport *evport; + unsigned long i; + u32 bw_share; + int err; + + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) + continue; + bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); + if (err) + return err; + + evport->qos.bw_share = bw_share; + } + + return 0; +} + +static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_rate_group *group; + u32 bw_share; + int err; + + list_for_each_entry(group, &esw->qos.groups, list) { + bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); + + if (bw_share == group->bw_share) + continue; + + err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); + if (err) + return err; + + group->bw_share = bw_share; + + /* All the group's vports need to be set with default bw_share + * to enable them with QOS + */ + err = esw_qos_normalize_vports_min_rate(esw, group, extack); + + if (err) + return err; + } + + return 0; +} + +int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 min_rate, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share, previous_min_rate; + bool min_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + if (min_rate && !min_rate_supported) + return -EOPNOTSUPP; + if (min_rate == evport->qos.min_rate) + return 0; + + previous_min_rate = evport->qos.min_rate; + evport->qos.min_rate = min_rate; + err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); + if (err) + evport->qos.min_rate = previous_min_rate; + + return err; +} + +int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 max_rate, + struct netlink_ext_ack *extack) +{ + u32 act_max_rate = max_rate; + bool max_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + + if (max_rate && !max_rate_supported) + return -EOPNOTSUPP; + if (max_rate == evport->qos.max_rate) + return 0; + + /* If parent group has rate limit need to set to group + * value when new max rate is 0. + */ + if (evport->qos.group && !max_rate) + act_max_rate = evport->qos.group->max_rate; + + err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); + + if (!err) + evport->qos.max_rate = max_rate; + + return err; +} + +static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 min_rate, struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_core_dev *dev = esw->dev; + u32 previous_min_rate, divider; + int err; + + if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) + return -EOPNOTSUPP; + + if (min_rate == group->min_rate) + return 0; + + previous_min_rate = group->min_rate; + group->min_rate = min_rate; + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + group->min_rate = previous_min_rate; + NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); + + /* Attempt restoring previous configuration */ + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) + NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); + } + + return err; +} + +static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + u32 max_rate, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + if (group->max_rate == max_rate) + return 0; + + err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); + if (err) + return err; + + group->max_rate = max_rate; + + /* Any unlimited vports in the group should be set + * with the value of the group. + */ + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || + vport->qos.group != group || vport->qos.max_rate) + continue; + + err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "E-Switch vport implicit rate limit setting failed"); + } + + return err; +} + +static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + &vport->qos.esw_tsar_ix); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + return err; + } + + return 0; +} + +static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *curr_group, + struct mlx5_esw_rate_group *new_group, + struct netlink_ext_ack *extack) +{ + u32 max_rate; + int err; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); + return err; + } + + vport->qos.group = new_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; + + /* If vport is unlimited, we set the group's value. + * Therefore, if the group is limited it will apply to + * the vport as well and if not, vport will remain unlimited. + */ + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); + goto err_sched; + } + + return 0; + +err_sched: + vport->qos.group = curr_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; + if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) + esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + vport->vport); + + return err; +} + +static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *new_group, *curr_group; + int err; + + if (!vport->enabled) + return -EINVAL; + + curr_group = vport->qos.group; + new_group = group ?: esw->qos.group0; + if (curr_group == new_group) + return 0; + + err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); + if (err) + return err; + + /* Recalculate bw share weights of old and new groups */ + if (vport->qos.bw_share) { + esw_qos_normalize_vports_min_rate(esw, curr_group, extack); + esw_qos_normalize_vports_min_rate(esw, new_group, extack); + } + + return 0; +} + +static struct mlx5_esw_rate_group * +esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group; + u32 divider; + int err; + + if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) + return ERR_PTR(-EOPNOTSUPP); + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + esw->qos.root_tsar_ix); + err = mlx5_create_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &group->tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); + goto err_sched_elem; + } + + list_add_tail(&group->list, &esw->qos.groups); + + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (divider) { + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + goto err_min_rate; + } + } + trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); + + return group; + +err_min_rate: + list_del(&group->list); + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); +err_sched_elem: + kfree(group); + return ERR_PTR(err); +} + +static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 divider; + int err; + + list_del(&group->list); + + divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); + + trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + kfree(group); + return err; +} + +static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) +{ + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + +void mlx5_esw_qos_create(struct mlx5_eswitch *esw) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; + int err; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return; + + if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return; + + mutex_lock(&esw->state_lock); + if (esw->qos.enabled) + goto unlock; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &esw->qos.root_tsar_ix); + if (err) { + esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); + goto unlock; + } + + INIT_LIST_HEAD(&esw->qos.groups); + if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { + esw->qos.group0 = esw_qos_create_rate_group(esw, NULL); + if (IS_ERR(esw->qos.group0)) { + esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", + PTR_ERR(esw->qos.group0)); + goto err_group0; + } + } + esw->qos.enabled = true; +unlock: + mutex_unlock(&esw->state_lock); + return; + +err_group0: + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); + mutex_unlock(&esw->state_lock); +} + +void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + int err; + + devlink_rate_nodes_destroy(devlink); + mutex_lock(&esw->state_lock); + if (!esw->qos.enabled) + goto unlock; + + if (esw->qos.group0) + esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); + + esw->qos.enabled = false; +unlock: + mutex_unlock(&esw->state_lock); +} + +int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!esw->qos.enabled) + return 0; + + if (vport->qos.enabled) + return -EEXIST; + + vport->qos.group = esw->qos.group0; + + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); + if (!err) { + vport->qos.enabled = true; + trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + } + + return err; +} + +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!esw->qos.enabled || !vport->qos.enabled) + return; + WARN(vport->qos.group && vport->qos.group != esw->qos.group0, + "Disabling QoS on port before detaching it from group"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + + vport->qos.enabled = false; + trace_mlx5_esw_vport_qos_destroy(vport); +} + +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + u32 bitmask; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + if (!vport->qos.enabled) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + bitmask); +} + +#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ + +/* Converts bytes per second value passed in a pointer into megabits per + * second, rewriting last. If converted rate exceed link speed or is not a + * fraction of Mbps - returns error. + */ +static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, + u64 *rate, struct netlink_ext_ack *extack) +{ + u32 link_speed_max, reminder; + u64 value; + int err; + + err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); + return err; + } + + value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); + if (reminder) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); + return -EINVAL; + } + + if (value > link_speed_max) { + pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", + name, value, link_speed_max); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); + return -EINVAL; + } + + *rate = value; + return 0; +} + +/* Eswitch devlink rate API */ + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Rate node creation supported only in switchdev mode"); + err = -EOPNOTSUPP; + goto unlock; + } + + group = esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) { + err = PTR_ERR(group); + goto unlock; + } + + *priv = group; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw; + int err; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + err = esw_qos_destroy_rate_group(esw, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&esw->state_lock); + err = esw_qos_vport_update_group(esw, vport, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_vport *vport = priv; + + if (!parent) + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, + vport, NULL, extack); + + group = parent_priv; + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h new file mode 100644 index 000000000000..28451abe2d2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_ESW_QOS_H__ +#define __MLX5_ESW_QOS_H__ + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 min_rate, + struct netlink_ext_ack *extack); +int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 max_rate, + struct netlink_ext_ack *extack); +void mlx5_esw_qos_create(struct mlx5_eswitch *esw); +void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw); +int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share); +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h deleted file mode 100644 index 2a3f4be10030..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2021 Mellanox Technologies. */ - -#ifndef __MLX5_EN_TC_SAMPLE_H__ -#define __MLX5_EN_TC_SAMPLE_H__ - -#include "en.h" -#include "eswitch.h" - -struct mlx5e_priv; -struct mlx5_flow_attr; -struct mlx5_esw_psample; - -struct mlx5_sample_attr { - u32 group_num; - u32 rate; - u32 trunc_size; - u32 restore_obj_id; - u32 sampler_id; - struct mlx5_flow_table *sample_default_tbl; - struct mlx5_sample_flow *sample_flow; -}; - -void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); - -struct mlx5_flow_handle * -mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv, - struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr); - -void -mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv, - struct mlx5_flow_handle *rule, - struct mlx5_flow_attr *attr); - -struct mlx5_esw_psample * -mlx5_esw_sample_init(struct mlx5e_priv *priv); - -void -mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample); - -#endif /* __MLX5_EN_TC_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 97e6cb6f13c1..ec136b499204 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -38,6 +38,7 @@ #include <linux/mlx5/mpfs.h> #include "esw/acl/lgcy.h" #include "esw/legacy.h" +#include "esw/qos.h" #include "mlx5_core.h" #include "lib/eq.h" #include "eswitch.h" @@ -740,201 +741,6 @@ static void esw_vport_change_handler(struct work_struct *work) mutex_unlock(&esw->state_lock); } -static bool element_type_supported(struct mlx5_eswitch *esw, int type) -{ - const struct mlx5_core_dev *dev = esw->dev; - - switch (type) { - case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_TASR; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT_TC; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; - } - return false; -} - -/* Vport QoS management */ -static void esw_create_tsar(struct mlx5_eswitch *esw) -{ - u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_core_dev *dev = esw->dev; - __be32 *attr; - int err; - - if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return; - - if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) - return; - - if (esw->qos.enabled) - return; - - MLX5_SET(scheduling_context, tsar_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); - - attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); - - err = mlx5_create_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - tsar_ctx, - &esw->qos.root_tsar_id); - if (err) { - esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err); - return; - } - - esw->qos.enabled = true; -} - -static void esw_destroy_tsar(struct mlx5_eswitch *esw) -{ - int err; - - if (!esw->qos.enabled) - return; - - err = mlx5_destroy_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - esw->qos.root_tsar_id); - if (err) - esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err); - - esw->qos.enabled = false; -} - -static int esw_vport_enable_qos(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - u32 initial_max_rate, u32 initial_bw_share) -{ - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_core_dev *dev = esw->dev; - void *vport_elem; - int err = 0; - - if (!esw->qos.enabled) - return 0; - - if (vport->qos.enabled) - return -EEXIST; - - MLX5_SET(scheduling_context, sched_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); - vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, - element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, - esw->qos.root_tsar_id); - MLX5_SET(scheduling_context, sched_ctx, max_average_bw, - initial_max_rate); - MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share); - - err = mlx5_create_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - sched_ctx, - &vport->qos.esw_tsar_ix); - if (err) { - esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", - vport->vport, err); - return err; - } - - vport->qos.enabled = true; - return 0; -} - -static void esw_vport_disable_qos(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int err; - - if (!vport->qos.enabled) - return; - - err = mlx5_destroy_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_tsar_ix); - if (err) - esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", - vport->vport, err); - - vport->qos.enabled = false; -} - -static int esw_vport_qos_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - u32 max_rate, u32 bw_share) -{ - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_core_dev *dev = esw->dev; - void *vport_elem; - u32 bitmask = 0; - int err = 0; - - if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return -EOPNOTSUPP; - - if (!vport->qos.enabled) - return -EIO; - - MLX5_SET(scheduling_context, sched_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); - vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, - element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, - esw->qos.root_tsar_id); - MLX5_SET(scheduling_context, sched_ctx, max_average_bw, - max_rate); - MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); - bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; - bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; - - err = mlx5_modify_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - sched_ctx, - vport->qos.esw_tsar_ix, - bitmask); - if (err) { - esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", - vport->vport, err); - return err; - } - - return 0; -} - -int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, - u32 rate_mbps) -{ - u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; - struct mlx5_vport *vport; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); - - if (!vport->qos.enabled) - return -EOPNOTSUPP; - - MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); - - return mlx5_modify_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - ctx, - vport->qos.esw_tsar_ix, - MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW); -} - static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac) { ((u8 *)node_guid)[7] = mac[0]; @@ -976,7 +782,7 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) return err; /* Attach vport to the eswitch rate limiter */ - esw_vport_enable_qos(esw, vport, vport->qos.max_rate, vport->qos.bw_share); + mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share); if (mlx5_esw_is_manager_vport(esw, vport_num)) return 0; @@ -1013,7 +819,7 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); - esw_vport_disable_qos(esw, vport); + mlx5_esw_qos_vport_disable(esw, vport); esw_vport_cleanup_acl(esw, vport); } @@ -1454,12 +1260,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); - esw_create_tsar(esw); + mlx5_esw_qos_create(esw); esw->mode = mode; - mlx5_lag_update(esw->dev); - if (mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1486,7 +1290,7 @@ abort: if (mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); - esw_destroy_tsar(esw); + mlx5_esw_qos_destroy(esw); mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1494,7 +1298,7 @@ abort: /** * mlx5_eswitch_enable - Enable eswitch * @esw: Pointer to eswitch - * @num_vfs: Enable eswitch swich for given number of VFs. + * @num_vfs: Enable eswitch switch for given number of VFs. * Caller must pass num_vfs > 0 when enabling eswitch for * vf vports. * mlx5_eswitch_enable() returns 0 on success or error code on failure. @@ -1506,6 +1310,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (!mlx5_esw_allowed(esw)) return 0; + mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); if (esw->mode == MLX5_ESWITCH_NONE) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); @@ -1519,6 +1324,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = num_vfs; } up_write(&esw->mode_lock); + mlx5_lag_enable_change(esw->dev); return ret; } @@ -1550,12 +1356,10 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) old_mode = esw->mode; esw->mode = MLX5_ESWITCH_NONE; - mlx5_lag_update(esw->dev); - if (old_mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); - esw_destroy_tsar(esw); + mlx5_esw_qos_destroy(esw); mlx5_esw_acls_ns_cleanup(esw); if (clear_vf) @@ -1567,10 +1371,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) if (!mlx5_esw_allowed(esw)) return; + mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw, clear_vf); esw->esw_funcs.num_vfs = 0; up_write(&esw->mode_lock); + mlx5_lag_enable_change(esw->dev); } static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out) @@ -1759,7 +1565,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) ida_init(&esw->offloads.vport_metadata_ida); xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); + lockdep_register_key(&esw->mode_lock_key); init_rwsem(&esw->mode_lock); + lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key); esw->enabled_vports = 0; esw->mode = MLX5_ESWITCH_NONE; @@ -1793,6 +1601,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); + lockdep_unregister_key(&esw->mode_lock_key); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); xa_destroy(&esw->offloads.vhca_map); @@ -1889,8 +1698,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_is_sf_vport(esw, vport_num); } -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack) { @@ -1899,7 +1707,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) return PTR_ERR(esw); @@ -1923,8 +1731,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, return err; } -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack) { @@ -1933,7 +1740,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) { NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); return PTR_ERR(esw); @@ -2049,110 +1856,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, return err; } -static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) -{ - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - struct mlx5_vport *evport; - u32 max_guarantee = 0; - unsigned long i; - - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled || evport->qos.min_rate < max_guarantee) - continue; - max_guarantee = evport->qos.min_rate; - } - - if (max_guarantee) - return max_t(u32, max_guarantee / fw_max_bw_share, 1); - return 0; -} - -static int normalize_vports_min_rate(struct mlx5_eswitch *esw) -{ - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - u32 divider = calculate_vports_min_rate_divider(esw); - struct mlx5_vport *evport; - u32 vport_max_rate; - u32 vport_min_rate; - unsigned long i; - u32 bw_share; - int err; - - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled) - continue; - vport_min_rate = evport->qos.min_rate; - vport_max_rate = evport->qos.max_rate; - bw_share = 0; - - if (divider) - bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, - divider, - fw_max_bw_share); - - if (bw_share == evport->qos.bw_share) - continue; - - err = esw_vport_qos_config(esw, evport, vport_max_rate, - bw_share); - if (!err) - evport->qos.bw_share = bw_share; - else - return err; - } - - return 0; -} - -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, - u32 max_rate, u32 min_rate) -{ - struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - u32 fw_max_bw_share; - u32 previous_min_rate; - bool min_rate_supported; - bool max_rate_supported; - int err = 0; - - if (!mlx5_esw_allowed(esw)) - return -EPERM; - if (IS_ERR(evport)) - return PTR_ERR(evport); - - fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && - fw_max_bw_share >= MLX5_MIN_BW_SHARE; - max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); - - if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) - return -EOPNOTSUPP; - - mutex_lock(&esw->state_lock); - - if (min_rate == evport->qos.min_rate) - goto set_max_rate; - - previous_min_rate = evport->qos.min_rate; - evport->qos.min_rate = min_rate; - err = normalize_vports_min_rate(esw); - if (err) { - evport->qos.min_rate = previous_min_rate; - goto unlock; - } - -set_max_rate: - if (max_rate == evport->qos.max_rate) - goto unlock; - - err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share); - if (!err) - evport->qos.max_rate = max_rate; - -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, u16 vport_num, struct ifla_vf_stats *vf_stats) @@ -2366,10 +2069,23 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) */ void mlx5_esw_unlock(struct mlx5_eswitch *esw) { + if (!mlx5_esw_allowed(esw)) + return; up_write(&esw->mode_lock); } /** + * mlx5_esw_lock() - Take write lock on esw mode lock + * @esw: eswitch device. + */ +void mlx5_esw_lock(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_allowed(esw)) + return; + down_write(&esw->mode_lock); +} + +/** * mlx5_eswitch_get_total_vports - Get total vports of the eswitch * * @dev: Pointer to core device @@ -2384,3 +2100,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) return mlx5_esw_allowed(esw) ? esw->total_vports : 0; } EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); + +/** + * mlx5_eswitch_get_core_dev - Get the mdev device + * @esw : eswitch device. + * + * Return the mellanox core device which manages the eswitch. + */ +struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) +{ + return mlx5_esw_allowed(esw) ? esw->dev : NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_core_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d562edf5b0bc..2c7444101bb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -46,7 +46,7 @@ #include "lib/fs_chains.h" #include "sf/sf.h" #include "en/tc_ct.h" -#include "esw/sample.h" +#include "en/tc/sample.h" enum mlx5_mapped_obj_type { MLX5_MAPPED_OBJ_CHAIN, @@ -61,6 +61,7 @@ struct mlx5_mapped_obj { u32 group_id; u32 rate; u32 trunc_size; + u32 tunnel_id; } sample; }; }; @@ -75,17 +76,20 @@ struct mlx5_mapped_obj { #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define MLX5_MIN_BW_SHARE 1 - -#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ - min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) - #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) #define esw_chains(esw) \ ((esw)->fdb_table.offloads.esw_chains_priv) +enum { + MAPPING_TYPE_CHAIN, + MAPPING_TYPE_TUNNEL, + MAPPING_TYPE_TUNNEL_ENC_OPTS, + MAPPING_TYPE_LABELS, + MAPPING_TYPE_ZONE, +}; + struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_handle *allow_rule; @@ -124,6 +128,8 @@ struct vport_egress { struct { struct mlx5_flow_group *fwd_grp; struct mlx5_flow_handle *fwd_rule; + struct mlx5_flow_handle *bounce_rule; + struct mlx5_flow_group *bounce_grp; } offloads; }; }; @@ -150,8 +156,6 @@ enum mlx5_eswitch_vport_event { MLX5_VPORT_PROMISC_CHANGE = BIT(3), }; -struct mlx5_esw_bridge; - struct mlx5_vport { struct mlx5_core_dev *dev; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; @@ -173,6 +177,7 @@ struct mlx5_vport { u32 bw_share; u32 min_rate; u32 max_rate; + struct mlx5_esw_rate_group *group; } qos; u16 vport; @@ -180,7 +185,6 @@ struct mlx5_vport { enum mlx5_eswitch_vport_event enabled_events; int index; struct devlink_port *dl_port; - struct mlx5_esw_bridge *bridge; }; struct mlx5_esw_indir_table; @@ -302,7 +306,9 @@ struct mlx5_eswitch { struct { bool enabled; - u32 root_tsar_id; + u32 root_tsar_ix; + struct mlx5_esw_rate_group *group0; + struct list_head groups; /* Protected by esw->state_lock */ } qos; struct mlx5_esw_bridge_offloads *br_offloads; @@ -315,6 +321,7 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; + struct lock_class_key mode_lock_key; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -327,8 +334,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); -int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, - u32 rate_mbps); +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); @@ -351,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, u16 vport_num, bool setting); int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate); +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, @@ -461,7 +471,6 @@ struct mlx5_esw_flow_attr { } dests[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_rx_tun_attr *rx_tun_attr; struct mlx5_pkt_reformat *decap_pkt_reformat; - struct mlx5_sample_attr *sample; }; int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, @@ -475,12 +484,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap); -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack); -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); @@ -699,11 +706,18 @@ void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); +void mlx5_esw_lock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller); +int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw); +void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw); +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -719,6 +733,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } +static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; } +static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; } + static inline struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { @@ -731,6 +748,23 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, { return vport_num; } + +static inline int +mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + return 0; +} + +static inline void +mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) {} + +static inline int +mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + return 0; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index feecf44994a9..0d461e38add3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -48,6 +48,7 @@ #include "lib/fs_chains.h" #include "en_tc.h" #include "en/mapping.h" +#include "devlink.h" #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) @@ -186,12 +187,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, static int esw_setup_sampler_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, - struct mlx5_esw_flow_attr *esw_attr, + struct mlx5_flow_attr *attr, int i) { flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; - dest[i].sampler_id = esw_attr->sample->sampler_id; + dest[i].sampler_id = attr->sample_attr->sampler_id; return 0; } @@ -434,7 +435,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { - esw_setup_sampler_dest(dest, flow_act, esw_attr, *i); + esw_setup_sampler_dest(dest, flow_act, attr, *i); (*i)++; } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); @@ -539,10 +540,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr; - /* esw_attr->sample is allocated only when there is a sample action */ - if (esw_attr->sample && esw_attr->sample->sample_default_tbl) { - fdb = esw_attr->sample->sample_default_tbl; - } else if (split) { + if (split) { fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; @@ -926,6 +924,7 @@ out: struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch *from_esw, struct mlx5_eswitch_rep *rep, u32 sqn) { @@ -944,10 +943,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); /* source vport is the esw manager */ - MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport); if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(rep->esw->dev, vhca_id)); + MLX5_CAP_GEN(from_esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -963,6 +962,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + if (rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) @@ -1613,7 +1615,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) goto ns_err; } - table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + + /* To be strictly correct: + * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + * should be: + * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + + * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + * but as the peer device might not be in switchdev mode it's not + * possible. We use the fact that by default FW sets max vfs and max sfs + * to the same value on both devices. If it needs to be changed in the future note + * the peer miss group should also be created based on the number of + * total vports of the peer (currently is also uses esw->total_vports). + */ + table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs; /* create the slow path fdb with encap set, so further table instances @@ -1670,7 +1683,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) source_eswitch_owner_vhca_id_valid, 1); } - ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ; + /* See comment above table_size calculation */ + ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); @@ -2310,14 +2324,293 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); } +static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + struct mlx5_eswitch *esw; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + struct mlx5_vport *vport; + int err; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL); + MLX5_SET(set_flow_table_root_in, in, other_vport, 1); + MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK); + + if (master) { + esw = master->priv.eswitch; + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1); + MLX5_SET(set_flow_table_root_in, in, table_vport_number, + MLX5_VPORT_UPLINK); + + ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + vport->index); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } else { + esw = slave->priv.eswitch; + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + ns = mlx5_get_flow_vport_acl_namespace(slave, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + vport->index); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id); + } + + err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); + mutex_unlock(&root->chain_lock); + + return err; +} + +static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + int err; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + + if (master) { + ns = mlx5_get_flow_namespace(master, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); + mutex_unlock(&root->chain_lock); + + return err; +} + +static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + struct mlx5_vport *vport, + struct mlx5_flow_table *acl) +{ + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(slave, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = slave->priv.eswitch->manager_vport; + dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + + flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act, + &dest, 1); + if (IS_ERR(flow_rule)) + err = PTR_ERR(flow_rule); + else + vport->egress.offloads.bounce_rule = flow_rule; + + kvfree(spec); + return err; +} + +static int esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_eswitch *esw = master->priv.eswitch; + struct mlx5_flow_table_attr ft_attr = { + .max_fte = 1, .prio = 0, .level = 0, + }; + struct mlx5_flow_namespace *egress_ns; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + struct mlx5_vport *vport; + void *match_criteria; + u32 *flow_group_in; + int err; + + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + egress_ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); + if (!egress_ns) + return -EINVAL; + + if (vport->egress.acl) + return -EINVAL; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + goto out; + } + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + goto err_group; + } + + err = __esw_set_master_egress_rule(master, slave, vport, acl); + if (err) + goto err_rule; + + vport->egress.acl = acl; + vport->egress.offloads.bounce_grp = g; + + kvfree(flow_group_in); + + return 0; + +err_rule: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(acl); +out: + kvfree(flow_group_in); + return err; +} + +static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(dev->priv.eswitch, + dev->priv.eswitch->manager_vport); + + esw_acl_egress_ofld_cleanup(vport); +} + +int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + int err; + + err = esw_set_uplink_slave_ingress_root(master_esw->dev, + slave_esw->dev); + if (err) + return -EINVAL; + + err = esw_set_slave_root_fdb(master_esw->dev, + slave_esw->dev); + if (err) + goto err_fdb; + + err = esw_set_master_egress_rule(master_esw->dev, + slave_esw->dev); + if (err) + goto err_acl; + + return err; + +err_acl: + esw_set_slave_root_fdb(NULL, slave_esw->dev); + +err_fdb: + esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); + + return err; +} + +void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + esw_unset_master_egress_rule(master_esw->dev); + esw_set_slave_root_fdb(NULL, slave_esw->dev); + esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); +} + #define ESW_OFFLOADS_DEVCOM_PAIR (0) #define ESW_OFFLOADS_DEVCOM_UNPAIR (1) -static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, - struct mlx5_eswitch *peer_esw) +static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw) { + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; - return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + mlx5_esw_for_each_rep(esw, i, rep) { + rep_type = NUM_REP_TYPES; + while (rep_type--) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) + ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL); + } + } } static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) @@ -2325,9 +2618,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) mlx5e_tc_clean_fdb_peer_flows(esw); #endif + mlx5_esw_offloads_rep_event_unpair(esw); esw_del_fdb_peer_miss_rules(esw); } +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; + int err; + + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + mlx5_esw_for_each_rep(esw, i, rep) { + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) { + err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw); + if (err) + goto err_out; + } + } + } + + return 0; + +err_out: + mlx5_esw_offloads_unpair(esw); + return err; +} + static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw, bool pair) @@ -2618,6 +2944,31 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) esw_vport_destroy_offloads_acl_tables(esw, vport); } +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + int ret; + + if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) + return 0; + + ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK); + if (ret) + return ret; + + mlx5_esw_for_each_rep(esw, i, rep) { + if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED) + mlx5_esw_offloads_rep_load(esw, rep->vport); + } + + return 0; +} + static int esw_offloads_steering_init(struct mlx5_eswitch *esw) { struct mlx5_esw_indir_table *indir; @@ -2787,6 +3138,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) struct mapping_ctx *reg_c0_obj_pool; struct mlx5_vport *vport; unsigned long i; + u64 mapping_id; int err; if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && @@ -2810,9 +3162,13 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; - reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj), - ESW_REG_C0_USER_DATA_METADATA_MASK, - true); + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + ESW_REG_C0_USER_DATA_METADATA_MASK, + true); + if (IS_ERR(reg_c0_obj_pool)) { err = PTR_ERR(reg_c0_obj_pool); goto err_pool; @@ -2990,10 +3346,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); - return err; + goto enable_lag; } cur_mlx5_mode = err; err = 0; @@ -3001,15 +3358,24 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (cur_mlx5_mode == mlx5_mode) goto unlock; - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + if (mlx5_devlink_trap_get_num_active(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change mode while devlink traps are active"); + err = -EOPNOTSUPP; + goto unlock; + } err = esw_offloads_start(esw, extack); - else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { err = esw_offloads_stop(esw, extack); - else + } else { err = -EINVAL; + } unlock: mlx5_esw_unlock(esw); +enable_lag: + mlx5_lag_enable_change(esw->dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index d713ae24d6b6..a1ac3a654962 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -27,7 +27,7 @@ static int pcie_core(struct notifier_block *, unsigned long, void *); static int forward_event(struct notifier_block *, unsigned long, void *); static struct mlx5_nb events_nbs_ref[] = { - /* Events to be proccessed by mlx5_core */ + /* Events to be processed by mlx5_core */ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index bd66ab2af5b5..306279b7f9e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -417,7 +417,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; u32 i; @@ -446,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) goto err_cqwq; } - err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -454,7 +453,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index); MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -476,7 +475,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) *conn->cq.mcq.arm_db = 0; conn->cq.mcq.vector = 0; conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; - conn->cq.mcq.irqn = irqn; conn->cq.mcq.uar = fdev->conn_res.uar; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 0bba92cf5dc0..8ec148010d62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -1516,7 +1516,7 @@ static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, mutex_lock(&fpga_xfrm->lock); if (!fpga_xfrm->sa_ctx) - /* Unbounded xfrm, chane only sw attrs */ + /* Unbounded xfrm, change only sw attrs */ goto change_sw_xfrm_attrs; /* copy original hw sa */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 896a6c3dbdb7..7db8df64a60e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -152,17 +152,56 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) return 0; } +static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + bool ft_id_valid, + u32 ft_id) +{ + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + if (ft_id_valid) { + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + ft_id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); +} + static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; struct mlx5_core_dev *dev = ns->dev; + int err; if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && underlay_qpn == 0) return 0; + if (ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + !mlx5_lag_is_master(dev)) + return 0; + MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); @@ -177,7 +216,24 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, MLX5_SET(set_flow_table_root_in, in, other_vport, !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); - return mlx5_cmd_exec_in(dev, set_flow_table_root, in); + err = mlx5_cmd_exec_in(dev, set_flow_table_root, in); + if (!err && + ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + mlx5_lag_is_master(dev)) { + err = mlx5_cmd_set_slave_root_fdb(dev, + mlx5_lag_get_peer_mdev(dev), + !disconnect, (!disconnect) ? + ft->id : 0); + if (err && !disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, + ns->root_ft->id); + mlx5_cmd_exec_in(dev, set_flow_table_root, in); + } + } + + return err; } static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c0697e1b7118..9fe8e3c204d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -413,7 +413,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec) return true; } -static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) +struct mlx5_flow_root_namespace *find_root(struct fs_node *node) { struct fs_node *root; struct mlx5_flow_namespace *ns; @@ -2343,7 +2343,7 @@ static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, #define FLOW_TABLE_BIT_SZ 1 #define GET_FLOW_TABLE_CAP(dev, offset) \ - ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \ + ((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) + \ offset / 32)) >> \ (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) @@ -2493,7 +2493,7 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); /* If this a prio with chains, and we can jump from one chain - * (namepsace) to another, so we accumulate the levels + * (namespace) to another, so we accumulate the levels */ if (prio->node.type == FS_TYPE_PRIO_CHAINS) acc_level = acc_level_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 7317cdeab661..98240badc342 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -294,6 +294,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); +struct mlx5_flow_root_namespace *find_root(struct fs_node *node); + #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } #define fs_list_for_each_entry(pos, root) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9abeb80ffa31..037e18dd4be0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -170,7 +170,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev) /* The reset only needs to be issued by one PF. The health buffer is * shared between all functions, and will be cleared during a reset. - * Check again to avoid a redundant 2nd reset. If the fatal erros was + * Check again to avoid a redundant 2nd reset. If the fatal errors was * PCI related a reset won't help. */ fatal_error = mlx5_health_check_fatal_sensors(dev); @@ -213,10 +213,6 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mutex_lock(&dev->intf_state_mutex); if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock;/* a previous error is still being handled */ - if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { - dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - goto unlock; - } enter_error_state(dev, force); unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 5c043c5cc403..f4dfa55c8c7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -32,7 +32,9 @@ #include <linux/netdevice.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/eswitch.h> #include <linux/mlx5/vport.h> +#include "lib/devcom.h" #include "mlx5_core.h" #include "eswitch.h" #include "lag.h" @@ -45,7 +47,7 @@ static DEFINE_SPINLOCK(lag_lock); static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2) + u8 remap_port2, bool shared_fdb) { u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); @@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); return mlx5_cmd_exec_in(dev, create_lag, in); } @@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, } static int mlx5_create_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) + struct lag_tracker *tracker, + bool shared_fdb) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], &ldev->v2p_map[MLX5_LAG_P2]); - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", - ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]); + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d", + ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], + shared_fdb); err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); - if (err) + ldev->v2p_map[MLX5_LAG_P2], shared_fdb); + if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", err); + return err; + } + + if (shared_fdb) { + err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); + if (err) + mlx5_core_err(dev0, "Can't enable single FDB mode\n"); + else + mlx5_core_info(dev0, "Operation mode is single FDB\n"); + } + + if (err) { + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } + return err; } int mlx5_activate_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - u8 flags) + u8 flags, + bool shared_fdb) { bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; - err = mlx5_create_lag(ldev, tracker); + err = mlx5_create_lag(ldev, tracker, shared_fdb); if (err) { if (roce_lag) { mlx5_core_err(dev0, @@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, } ldev->flags |= flags; + ldev->shared_fdb = shared_fdb; return 0; } @@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) ldev->flags &= ~MLX5_LAG_MODE_FLAGS; + if (ldev->shared_fdb) { + mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, + ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); + ldev->shared_fdb = false; + } + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); err = mlx5_cmd_exec_in(dev0, destroy_lag, in); if (err) { @@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) if (!ldev->pf[i].dev) continue; + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(ldev->pf[i].dev); } @@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + bool shared_fdb = ldev->shared_fdb; bool roce_lag; int err; roce_lag = __mlx5_lag_is_roce(ldev); - if (roce_lag) { + if (shared_fdb) { + mlx5_lag_remove_devices(ldev); + } else if (roce_lag) { if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); @@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) if (err) return; - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); + + if (shared_fdb) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + } +} + +static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + + if (is_mdev_switchdev_mode(dev0) && + is_mdev_switchdev_mode(dev1) && + mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && + mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && + mlx5_devcom_is_paired(dev0->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS) && + MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && + MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) + return true; + + return false; } static void mlx5_do_bond(struct mlx5_lag *ldev) @@ -371,14 +438,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) bool do_bond, roce_lag; int err; - if (!mlx5_lag_is_ready(ldev)) - return; - - tracker = ldev->tracker; + if (!mlx5_lag_is_ready(ldev)) { + do_bond = false; + } else { + tracker = ldev->tracker; - do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + } if (do_bond && !__mlx5_lag_is_active(ldev)) { + bool shared_fdb = mlx5_shared_fdb_supported(ldev); + roce_lag = !mlx5_sriov_is_enabled(dev0) && !mlx5_sriov_is_enabled(dev1); @@ -388,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; #endif - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_remove_devices(ldev); err = mlx5_activate_lag(ldev, &tracker, roce_lag ? MLX5_LAG_FLAG_ROCE : - MLX5_LAG_FLAG_SRIOV); + MLX5_LAG_FLAG_SRIOV, + shared_fdb); if (err) { - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); return; - } - - if (roce_lag) { + } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_nic_vport_enable_roce(dev1); + } else if (shared_fdb) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + + if (err) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_core_err(dev0, "Failed to enable lag\n"); + return; + } } } else if (do_bond && __mlx5_lag_is_active(ldev)) { mlx5_modify_lag(ldev, &tracker); @@ -418,21 +505,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) queue_delayed_work(ldev->wq, &ldev->bond_work, delay); } +static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev0) + mlx5_esw_lock(dev0->priv.eswitch); + if (dev1) + mlx5_esw_lock(dev1->priv.eswitch); +} + +static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev1) + mlx5_esw_unlock(dev1->priv.eswitch); + if (dev0) + mlx5_esw_unlock(dev0->priv.eswitch); +} + static void mlx5_do_bond_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, bond_work); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; int status; status = mlx5_dev_list_trylock(); if (!status) { - /* 1 sec delay. */ mlx5_queue_bond_work(ldev, HZ); return; } + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_lag_lock_eswitches(dev0, dev1); mlx5_do_bond(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); mlx5_dev_list_unlock(); } @@ -630,7 +744,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, } /* Must be called with intf_mutex held */ -static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) +static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; @@ -638,7 +752,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) - return; + return 0; tmp_dev = mlx5_get_next_phys_dev(dev); if (tmp_dev) @@ -648,15 +762,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) ldev = mlx5_lag_dev_alloc(dev); if (!ldev) { mlx5_core_err(dev, "Failed to alloc lag dev\n"); - return; + return 0; } } else { + if (ldev->mode_changes_in_progress) + return -EAGAIN; mlx5_ldev_get(ldev); } mlx5_ldev_add_mdev(ldev, dev); - return; + return 0; } void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) @@ -667,7 +783,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) if (!ldev) return; +recheck: mlx5_dev_list_lock(); + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } mlx5_ldev_remove_mdev(ldev, dev); mlx5_dev_list_unlock(); mlx5_ldev_put(ldev); @@ -675,8 +797,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { + int err; + +recheck: mlx5_dev_list_lock(); - __mlx5_lag_dev_add_mdev(dev); + err = __mlx5_lag_dev_add_mdev(dev); + if (err) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } mlx5_dev_list_unlock(); } @@ -690,11 +820,11 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, if (!ldev) return; - if (__mlx5_lag_is_active(ldev)) - mlx5_disable_lag(ldev); - mlx5_ldev_remove_netdev(ldev, netdev); ldev->flags &= ~MLX5_LAG_FLAG_READY; + + if (__mlx5_lag_is_active(ldev)) + mlx5_queue_bond_work(ldev, 0); } /* Must be called with intf_mutex held */ @@ -716,6 +846,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, if (i >= MLX5_MAX_PORTS) ldev->flags |= MLX5_LAG_FLAG_READY; + mlx5_queue_bond_work(ldev, 0); } bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) @@ -746,6 +877,21 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); +bool mlx5_lag_is_master(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev) && + dev == ldev->pf[MLX5_LAG_P1].dev; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_master); + bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -760,19 +906,50 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_sriov); -void mlx5_lag_update(struct mlx5_core_dev *dev) +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); + +void mlx5_lag_disable_change(struct mlx5_core_dev *dev) { + struct mlx5_core_dev *dev0; + struct mlx5_core_dev *dev1; struct mlx5_lag *ldev; mlx5_dev_list_lock(); + ldev = mlx5_lag_dev(dev); - if (!ldev) - goto unlock; + dev0 = ldev->pf[MLX5_LAG_P1].dev; + dev1 = ldev->pf[MLX5_LAG_P2].dev; - mlx5_do_bond(ldev); + ldev->mode_changes_in_progress++; + if (__mlx5_lag_is_active(ldev)) { + mlx5_lag_lock_eswitches(dev0, dev1); + mlx5_disable_lag(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); + } + mlx5_dev_list_unlock(); +} -unlock: +void mlx5_lag_enable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + mlx5_dev_list_lock(); + ldev = mlx5_lag_dev(dev); + ldev->mode_changes_in_progress--; mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, 0); } struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) @@ -827,6 +1004,26 @@ unlock: } EXPORT_SYMBOL(mlx5_lag_get_slave_port); +struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *peer_dev = NULL; + struct mlx5_lag *ldev; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + if (!ldev) + goto unlock; + + peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? + ldev->pf[MLX5_LAG_P2].dev : + ldev->pf[MLX5_LAG_P1].dev; + +unlock: + spin_unlock(&lag_lock); + return peer_dev; +} +EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); + int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h index 191392c37558..d4bae528954e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -39,6 +39,8 @@ struct lag_tracker { */ struct mlx5_lag { u8 flags; + int mode_changes_in_progress; + bool shared_fdb; u8 v2p_map[MLX5_MAX_PORTS]; struct kref ref; struct lag_func pf[MLX5_MAX_PORTS]; @@ -71,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - u8 flags); + u8 flags, + bool shared_fdb); int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index c4bf8b679541..011b639b29bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct lag_tracker tracker; tracker = ldev->tracker; - mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH); + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false); } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index ce696d523493..ffac8a0e7a23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -749,7 +749,7 @@ static int mlx5_pps_event(struct notifier_block *nb, } else { ptp_event.type = PTP_CLOCK_EXTTS; } - /* TODOL clock->ptp can be NULL if ptp_clock_register failes */ + /* TODOL clock->ptp can be NULL if ptp_clock_register fails */ ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 624cedebb510..d3d628b862f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -104,4 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c index 38084400ee8f..e3b0a131c3e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -40,7 +40,7 @@ struct mlx5_vxlan { struct mlx5_core_dev *mdev; - /* max_num_ports is usuallly 4, 16 buckets is more than enough */ + /* max_num_ports is usually 4, 16 buckets is more than enough */ DECLARE_HASHTABLE(htable, 4); struct mutex sync_lock; /* sync add/del port HW operations */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index eb1b316560a8..80cabf9b1787 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -389,11 +389,11 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, switch (cap_mode) { case HCA_CAP_OPMOD_GET_MAX: - memcpy(dev->caps.hca_max[cap_type], hca_caps, + memcpy(dev->caps.hca[cap_type]->max, hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; case HCA_CAP_OPMOD_GET_CUR: - memcpy(dev->caps.hca_cur[cap_type], hca_caps, + memcpy(dev->caps.hca[cap_type]->cur, hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; default: @@ -469,7 +469,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) return err; set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); - memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur, MLX5_ST_SZ_BYTES(odp_cap)); #define ODP_CAP_SET_MAX(dev, field) \ @@ -514,7 +514,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); - memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL], + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur, MLX5_ST_SZ_BYTES(cmd_hca_cap)); mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", @@ -596,7 +596,7 @@ static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) return 0; set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); - memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ROCE], + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur, MLX5_ST_SZ_BYTES(roce_cap)); MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1); @@ -748,14 +748,12 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, const struct pci_device_id *id) { - struct mlx5_priv *priv = &dev->priv; int err = 0; mutex_init(&dev->pci_status_mutex); pci_set_drvdata(dev->pdev, dev); dev->bar_addr = pci_resource_start(pdev, 0); - priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); err = mlx5_pci_enable_device(dev); if (err) { @@ -1179,6 +1177,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_ec; } + mlx5_lag_add_mdev(dev); err = mlx5_sriov_attach(dev); if (err) { mlx5_core_err(dev, "sriov init failed %d\n", err); @@ -1186,11 +1185,11 @@ static int mlx5_load(struct mlx5_core_dev *dev) } mlx5_sf_dev_table_create(dev); - mlx5_lag_add_mdev(dev); return 0; err_sriov: + mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); err_ec: mlx5_sf_hw_table_destroy(dev); @@ -1222,9 +1221,9 @@ err_irq_table: static void mlx5_unload(struct mlx5_core_dev *dev) { - mlx5_lag_remove_mdev(dev); mlx5_sf_dev_table_destroy(dev); mlx5_sriov_detach(dev); + mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); mlx5_sf_hw_table_destroy(dev); mlx5_vhca_event_stop(dev); @@ -1248,11 +1247,6 @@ int mlx5_init_one(struct mlx5_core_dev *dev) int err = 0; mutex_lock(&dev->intf_state_mutex); - if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { - mlx5_core_warn(dev, "interface is up, NOP\n"); - goto out; - } - /* remove any previous indication of internal error */ dev->state = MLX5_DEVICE_STATE_UP; err = mlx5_function_setup(dev, true); @@ -1271,7 +1265,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); + err = mlx5_devlink_register(priv_to_devlink(dev)); if (err) goto err_devlink_reg; @@ -1293,7 +1287,6 @@ function_teardown: mlx5_function_teardown(dev, true); err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; -out: mutex_unlock(&dev->intf_state_mutex); return err; } @@ -1380,6 +1373,60 @@ out: mutex_unlock(&dev->intf_state_mutex); } +static const int types[] = { + MLX5_CAP_GENERAL, + MLX5_CAP_GENERAL_2, + MLX5_CAP_ETHERNET_OFFLOADS, + MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, + MLX5_CAP_ODP, + MLX5_CAP_ATOMIC, + MLX5_CAP_ROCE, + MLX5_CAP_IPOIB_OFFLOADS, + MLX5_CAP_FLOW_TABLE, + MLX5_CAP_ESWITCH_FLOW_TABLE, + MLX5_CAP_ESWITCH, + MLX5_CAP_VECTOR_CALC, + MLX5_CAP_QOS, + MLX5_CAP_DEBUG, + MLX5_CAP_DEV_MEM, + MLX5_CAP_DEV_EVENT, + MLX5_CAP_TLS, + MLX5_CAP_VDPA_EMULATION, + MLX5_CAP_IPSEC, +}; + +static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) +{ + int type; + int i; + + for (i = 0; i < ARRAY_SIZE(types); i++) { + type = types[i]; + kfree(dev->caps.hca[type]); + } +} + +static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_hca_cap *cap; + int type; + int i; + + for (i = 0; i < ARRAY_SIZE(types); i++) { + cap = kzalloc(sizeof(*cap), GFP_KERNEL); + if (!cap) + goto err; + type = types[i]; + dev->caps.hca[type] = cap; + } + + return 0; + +err: + mlx5_hca_caps_free(dev); + return -ENOMEM; +} + int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) { struct mlx5_priv *priv = &dev->priv; @@ -1399,6 +1446,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); + priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); INIT_LIST_HEAD(&priv->traps); @@ -1415,8 +1463,14 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) if (err) goto err_adev_init; + err = mlx5_hca_caps_alloc(dev); + if (err) + goto err_hca_caps; + return 0; +err_hca_caps: + mlx5_adev_cleanup(dev); err_adev_init: mlx5_pagealloc_cleanup(dev); err_pagealloc_init: @@ -1435,6 +1489,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; + mlx5_hca_caps_free(dev); mlx5_adev_cleanup(dev); mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); @@ -1452,7 +1507,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "devlink alloc failed\n"); return -ENOMEM; @@ -1784,16 +1839,14 @@ static int __init init(void) if (err) goto err_sf; -#ifdef CONFIG_MLX5_CORE_EN err = mlx5e_init(); - if (err) { - pci_unregister_driver(&mlx5_core_driver); - goto err_debug; - } -#endif + if (err) + goto err_en; return 0; +err_en: + mlx5_sf_driver_unregister(); err_sf: pci_unregister_driver(&mlx5_core_driver); err_debug: @@ -1803,9 +1856,7 @@ err_debug: static void __exit cleanup(void) { -#ifdef CONFIG_MLX5_CORE_EN mlx5e_cleanup(); -#endif mlx5_sf_driver_unregister(); pci_unregister_driver(&mlx5_core_driver); mlx5_unregister_debugfs(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 343807ac2036..230eab7e3bc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_add_mdev(struct mlx5_core_dev *dev); void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev); +void mlx5_lag_disable_change(struct mlx5_core_dev *dev); +void mlx5_lag_enable_change(struct mlx5_core_dev *dev); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); @@ -206,8 +208,13 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, int mlx5_fw_version_query(struct mlx5_core_dev *dev, u32 *running_ver, u32 *stored_ver); +#ifdef CONFIG_MLX5_CORE_EN int mlx5e_init(void); void mlx5e_cleanup(void); +#else +static inline int mlx5e_init(void){ return 0; } +static inline void mlx5e_cleanup(void){} +#endif static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) { @@ -270,4 +277,9 @@ static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); } + +bool mlx5_eth_supported(struct mlx5_core_dev *dev); +bool mlx5_rdma_supported(struct mlx5_core_dev *dev); +bool mlx5_vnet_supported(struct mlx5_core_dev *dev); + #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index b25f764daa08..c79a10b3454d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -18,7 +18,7 @@ #define MLX5_SFS_PER_CTRL_IRQ 64 #define MLX5_IRQ_CTRL_SF_MAX 8 -/* min num of vectores for SFs to be enabled */ +/* min num of vectors for SFs to be enabled */ #define MLX5_IRQ_VEC_COMP_BASE_SF 2 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) @@ -28,13 +28,13 @@ #define MLX5_EQ_REFS_PER_IRQ (2) struct mlx5_irq { - u32 index; struct atomic_notifier_head nh; cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; - struct kref kref; - int irqn; struct mlx5_irq_pool *pool; + int refcount; + u32 index; + int irqn; }; struct mlx5_irq_pool { @@ -138,9 +138,8 @@ out: return ret; } -static void irq_release(struct kref *kref) +static void irq_release(struct mlx5_irq *irq) { - struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref); struct mlx5_irq_pool *pool = irq->pool; xa_erase(&pool->irqs, irq->index); @@ -159,10 +158,31 @@ static void irq_put(struct mlx5_irq *irq) struct mlx5_irq_pool *pool = irq->pool; mutex_lock(&pool->lock); - kref_put(&irq->kref, irq_release); + irq->refcount--; + if (!irq->refcount) + irq_release(irq); mutex_unlock(&pool->lock); } +static int irq_get_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + if (WARN_ON_ONCE(!irq->refcount)) + return 0; + irq->refcount++; + return 1; +} + +static int irq_get(struct mlx5_irq *irq) +{ + int err; + + mutex_lock(&irq->pool->lock); + err = irq_get_locked(irq); + mutex_unlock(&irq->pool->lock); + return err; +} + static irqreturn_t irq_int_handler(int irq, void *nh) { atomic_notifier_call_chain(nh, 0, NULL); @@ -214,7 +234,8 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) err = -ENOMEM; goto err_cpumask; } - kref_init(&irq->kref); + irq->pool = pool; + irq->refcount = 1; irq->index = i; err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL)); if (err) { @@ -222,7 +243,6 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) irq->index, err); goto err_xa; } - irq->pool = pool; return irq; err_xa: free_cpumask_var(irq->mask); @@ -235,24 +255,27 @@ err_req_irq: int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) { - int err; + int ret; - err = kref_get_unless_zero(&irq->kref); - if (WARN_ON_ONCE(!err)) + ret = irq_get(irq); + if (!ret) /* Something very bad happens here, we are enabling EQ * on non-existing IRQ. */ return -ENOENT; - err = atomic_notifier_chain_register(&irq->nh, nb); - if (err) + ret = atomic_notifier_chain_register(&irq->nh, nb); + if (ret) irq_put(irq); - return err; + return ret; } int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) { + int err = 0; + + err = atomic_notifier_chain_unregister(&irq->nh, nb); irq_put(irq); - return atomic_notifier_chain_unregister(&irq->nh, nb); + return err; } struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) @@ -301,10 +324,9 @@ static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, xa_for_each_range(&pool->irqs, index, iter, start, end) { if (!cpumask_equal(iter->mask, affinity)) continue; - if (kref_read(&iter->kref) < pool->min_threshold) + if (iter->refcount < pool->min_threshold) return iter; - if (!irq || kref_read(&iter->kref) < - kref_read(&irq->kref)) + if (!irq || iter->refcount < irq->refcount) irq = iter; } return irq; @@ -319,7 +341,7 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, mutex_lock(&pool->lock); least_loaded_irq = irq_pool_find_least_loaded(pool, affinity); if (least_loaded_irq && - kref_read(&least_loaded_irq->kref) < pool->min_threshold) + least_loaded_irq->refcount < pool->min_threshold) goto out; new_irq = irq_pool_create_irq(pool, affinity); if (IS_ERR(new_irq)) { @@ -337,11 +359,11 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, least_loaded_irq = new_irq; goto unlock; out: - kref_get(&least_loaded_irq->kref); - if (kref_read(&least_loaded_irq->kref) > pool->max_threshold) + irq_get_locked(least_loaded_irq); + if (least_loaded_irq->refcount > pool->max_threshold) mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", least_loaded_irq->irqn, pool->name, - kref_read(&least_loaded_irq->kref) / MLX5_EQ_REFS_PER_IRQ); + least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ); unlock: mutex_unlock(&pool->lock); return least_loaded_irq; @@ -357,7 +379,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, mutex_lock(&pool->lock); irq = xa_load(&pool->irqs, vecidx); if (irq) { - kref_get(&irq->kref); + irq_get_locked(irq); goto unlock; } irq = irq_request(pool, vecidx); @@ -424,7 +446,7 @@ out: return irq; mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", irq->irqn, cpumask_pr_args(affinity), - kref_read(&irq->kref) / MLX5_EQ_REFS_PER_IRQ); + irq->refcount / MLX5_EQ_REFS_PER_IRQ); return irq; } @@ -437,6 +459,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, if (!pool) return ERR_PTR(-ENOMEM); pool->dev = dev; + mutex_init(&pool->lock); xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); pool->xa_num_irqs.min = start; pool->xa_num_irqs.max = start + size - 1; @@ -445,7 +468,6 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, name); pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; - mutex_init(&pool->lock); mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", name, size, start); return pool; @@ -456,9 +478,14 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) struct mlx5_irq *irq; unsigned long index; + /* There are cases in which we are destrying the irq_table before + * freeing all the IRQs, fast teardown for example. Hence, free the irqs + * which might not have been freed. + */ xa_for_each(&pool->irqs, index, irq) - irq_release(&irq->kref); + irq_release(irq); xa_destroy(&pool->irqs); + mutex_destroy(&pool->lock); kvfree(pool); } @@ -479,7 +506,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) if (!mlx5_sf_max_functions(dev)) return 0; if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { - mlx5_core_err(dev, "Not enough IRQs for SFs. SF may run at lower performance\n"); + mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); return 0; } @@ -597,7 +624,7 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) return; /* There are cases where IRQs still will be in used when we reaching - * to here. Hence, making sure all the irqs are realeased. + * to here. Hence, making sure all the irqs are released. */ irq_pools_destroy(table); pci_free_irq_vectors(dev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index fa0288afc0dd..871c2fbe18d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -39,7 +39,7 @@ static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, cha struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev); struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); - return scnprintf(buf, PAGE_SIZE, "%u\n", sf_dev->sfnum); + return sysfs_emit(buf, "%u\n", sf_dev->sfnum); } static DEVICE_ATTR_RO(sfnum); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 42c8ee03fe3e..052f48068dc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&adev->dev); if (!devlink) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 1be048769309..13891fdc607e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf) return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE; } -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err = 0; @@ -248,11 +248,11 @@ out: return err; } -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err; @@ -476,7 +476,7 @@ static void mlx5_sf_table_disable(struct mlx5_sf_table *table) return; /* Balances with refcount_set; drop the reference so that new user cmd cannot start - * and new vhca event handler cannnot run. + * and new vhca event handler cannot run. */ mlx5_sf_table_put(table); wait_for_completion(&table->disable_complete); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 81ce13b19ee8..3a480e06ecc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, unsigned int *new_port_index); int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); #else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 12cf323a5943..8a1623a4d8bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -749,7 +749,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_cqe64 *cqe; struct mlx5dr_cq *cq; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; int vector; @@ -782,7 +781,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, goto err_cqwq; vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -790,7 +789,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -818,7 +817,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, *cq->mcq.arm_db = cpu_to_be32(2 << 28); cq->mcq.vector = 0; - cq->mcq.irqn = irqn; cq->mcq.uar = uar; return cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index f1950e4968da..e4dd4eed5aee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -352,6 +352,7 @@ static void dr_ste_v0_set_rx_decap(u8 *hw_ste_p) { MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, DR_STE_TUNL_ACTION_DECAP); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); } static void dr_ste_v0_set_rx_pop_vlan(u8 *hw_ste_p) @@ -365,6 +366,7 @@ static void dr_ste_v0_set_rx_decap_l3(u8 *hw_ste_p, bool vlan) MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, DR_STE_TUNL_ACTION_L3_DECAP); MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); } static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 12871c8dc7c1..d1ae248e125c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -58,10 +58,10 @@ config MLXSW_SPECTRUM depends on NET_IPGRE || NET_IPGRE=n depends on IPV6_GRE || IPV6_GRE=n depends on VXLAN || VXLAN=n + depends on PTP_1588_CLOCK_OPTIONAL select GENERIC_ALLOCATOR select PARMAN select OBJAGG - imply PTP_1588_CLOCK select NET_PTP_CLASSIFY if PTP_1588_CLOCK default m help diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e775f08fb464..f080fab3de2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (!reload) { alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size; - devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size); + devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size, + mlxsw_bus_info->dev); if (!devlink) { err = -ENOMEM; goto err_devlink_alloc; @@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_emad_init; if (!reload) { - err = devlink_register(devlink, mlxsw_bus_info->dev); + err = devlink_register(devlink); if (err) goto err_devlink_register; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7e221ef01437..f69cbb3852d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9079,7 +9079,7 @@ mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; struct net_device *dev; dev = br_fdb_find_port(rif->dev, mac, 0); @@ -9127,8 +9127,8 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) { + struct switchdev_notifier_fdb_info info = {}; u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); - struct switchdev_notifier_fdb_info info; struct net_device *br_dev; struct net_device *dev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f5d0d392efbf..22fede5cb32c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2520,7 +2520,7 @@ mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, const char *mac, u16 vid, struct net_device *dev, bool offloaded) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = mac; info.vid = vid; diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig index d54aa164c4e9..735eea1dacf1 100644 --- a/drivers/net/ethernet/microchip/Kconfig +++ b/drivers/net/ethernet/microchip/Kconfig @@ -45,6 +45,7 @@ config ENCX24J600 config LAN743X tristate "LAN743x support" depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL select PHYLIB select CRC16 select CRC32 diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 0443f66b5550..9a8e4f201eb1 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -277,7 +277,7 @@ static void sparx5_fdb_call_notifiers(enum switchdev_notifier_type type, const char *mac, u16 vid, struct net_device *dev, bool offloaded) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = mac; info.vid = vid; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 9d485a9d1f1f..cb68eaaac881 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -13,19 +13,26 @@ */ #define VSTAX 73 -static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) +#define ifh_encode_bitfield(ifh, value, pos, _width) \ + ({ \ + u32 width = (_width); \ + \ + /* Max width is 5 bytes - 40 bits. In worst case this will + * spread over 6 bytes - 48 bits + */ \ + compiletime_assert(width <= 40, \ + "Unsupported width, must be <= 40"); \ + __ifh_encode_bitfield((ifh), (value), (pos), width); \ + }) + +static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) { u8 *ifh_hdr = ifh; /* Calculate the Start IFH byte position of this IFH bit position */ u32 byte = (35 - (pos / 8)); /* Calculate the Start bit position in the Start IFH byte */ u32 bit = (pos % 8); - u64 encode = GENMASK(bit + width - 1, bit) & (value << bit); - - /* Max width is 5 bytes - 40 bits. In worst case this will - * spread over 6 bytes - 48 bits - */ - compiletime_assert(width <= 40, "Unsupported width, must be <= 40"); + u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit); /* The b0-b7 goes into the start IFH byte */ if (encode & 0xFF) diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig index 2d3157e4d081..b6a73d151dec 100644 --- a/drivers/net/ethernet/mscc/Kconfig +++ b/drivers/net/ethernet/mscc/Kconfig @@ -16,7 +16,7 @@ config MSCC_OCELOT_SWITCH_LIB select NET_DEVLINK select REGMAP_MMIO select PACKING - select PHYLIB + select PHYLINK tristate help This is a hardware support library for Ocelot network switches. It is @@ -24,6 +24,7 @@ config MSCC_OCELOT_SWITCH_LIB config MSCC_OCELOT_SWITCH tristate "Ocelot switch driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV depends on HAS_IOMEM diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index adfb9781799e..8ec194178aa2 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -377,7 +377,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port) return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port); } -int ocelot_port_flush(struct ocelot *ocelot, int port) +static int ocelot_port_flush(struct ocelot *ocelot, int port) { unsigned int pause_ena; int err, val; @@ -429,63 +429,118 @@ int ocelot_port_flush(struct ocelot *ocelot, int port) return err; } -EXPORT_SYMBOL(ocelot_port_flush); -void ocelot_adjust_link(struct ocelot *ocelot, int port, - struct phy_device *phydev) +void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port, + unsigned int link_an_mode, + phy_interface_t interface, + unsigned long quirks) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - int speed, mode = 0; + int err; + + ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA, + DEV_MAC_ENA_CFG); + + ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0); + + err = ocelot_port_flush(ocelot, port); + if (err) + dev_err(ocelot->dev, "failed to flush port %d: %d\n", + port, err); + + /* Put the port in reset. */ + if (interface != PHY_INTERFACE_MODE_QSGMII || + !(quirks & OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP)) + ocelot_port_rmwl(ocelot_port, + DEV_CLOCK_CFG_MAC_TX_RST | + DEV_CLOCK_CFG_MAC_TX_RST, + DEV_CLOCK_CFG_MAC_TX_RST | + DEV_CLOCK_CFG_MAC_TX_RST, + DEV_CLOCK_CFG); +} +EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_down); + +void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, + struct phy_device *phydev, + unsigned int link_an_mode, + phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause, + unsigned long quirks) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + int mac_speed, mode = 0; + u32 mac_fc_cfg; + + /* The MAC might be integrated in systems where the MAC speed is fixed + * and it's the PCS who is performing the rate adaptation, so we have + * to write "1000Mbps" into the LINK_SPEED field of DEV_CLOCK_CFG + * (which is also its default value). + */ + if ((quirks & OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION) || + speed == SPEED_1000) { + mac_speed = OCELOT_SPEED_1000; + mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA; + } else if (speed == SPEED_2500) { + mac_speed = OCELOT_SPEED_2500; + mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA; + } else if (speed == SPEED_100) { + mac_speed = OCELOT_SPEED_100; + } else { + mac_speed = OCELOT_SPEED_10; + } + + if (duplex == DUPLEX_FULL) + mode |= DEV_MAC_MODE_CFG_FDX_ENA; - switch (phydev->speed) { + ocelot_port_writel(ocelot_port, mode, DEV_MAC_MODE_CFG); + + /* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and + * PORT_RST bits in DEV_CLOCK_CFG. + */ + ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(mac_speed), + DEV_CLOCK_CFG); + + switch (speed) { case SPEED_10: - speed = OCELOT_SPEED_10; + mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_10); break; case SPEED_100: - speed = OCELOT_SPEED_100; + mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_100); break; case SPEED_1000: - speed = OCELOT_SPEED_1000; - mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA; - break; case SPEED_2500: - speed = OCELOT_SPEED_2500; - mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA; + mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_1000); break; default: - dev_err(ocelot->dev, "Unsupported PHY speed on port %d: %d\n", - port, phydev->speed); + dev_err(ocelot->dev, "Unsupported speed on port %d: %d\n", + port, speed); return; } - phy_print_status(phydev); - - if (!phydev->link) - return; - - /* Only full duplex supported for now */ - ocelot_port_writel(ocelot_port, DEV_MAC_MODE_CFG_FDX_ENA | - mode, DEV_MAC_MODE_CFG); - - /* Disable HDX fast control */ - ocelot_port_writel(ocelot_port, DEV_PORT_MISC_HDX_FAST_DIS, - DEV_PORT_MISC); + /* Handle RX pause in all cases, with 2500base-X this is used for rate + * adaptation. + */ + mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA; - /* SGMII only for now */ - ocelot_port_writel(ocelot_port, PCS1G_MODE_CFG_SGMII_MODE_ENA, - PCS1G_MODE_CFG); - ocelot_port_writel(ocelot_port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG); + if (tx_pause) + mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA | + SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) | + SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) | + SYS_MAC_FC_CFG_ZERO_PAUSE_ENA; - /* Enable PCS */ - ocelot_port_writel(ocelot_port, PCS1G_CFG_PCS_ENA, PCS1G_CFG); + /* Flow control. Link speed is only used here to evaluate the time + * specification in incoming pause frames. + */ + ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port); - /* No aneg on SGMII */ - ocelot_port_writel(ocelot_port, 0, PCS1G_ANEG_CFG); + ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port); - /* No loopback */ - ocelot_port_writel(ocelot_port, 0, PCS1G_LB_CFG); + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause); - /* Enable MAC module */ + /* Undo the effects of ocelot_phylink_mac_link_down: + * enable MAC module + */ ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA | DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG); @@ -502,39 +557,8 @@ void ocelot_adjust_link(struct ocelot *ocelot, int port, /* Core: Enable port for frame transfer */ ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1); - - /* Flow control */ - ocelot_write_rix(ocelot, SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) | - SYS_MAC_FC_CFG_RX_FC_ENA | SYS_MAC_FC_CFG_TX_FC_ENA | - SYS_MAC_FC_CFG_ZERO_PAUSE_ENA | - SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) | - SYS_MAC_FC_CFG_FC_LINK_SPEED(speed), - SYS_MAC_FC_CFG, port); - ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port); -} -EXPORT_SYMBOL(ocelot_adjust_link); - -void ocelot_port_enable(struct ocelot *ocelot, int port, - struct phy_device *phy) -{ - /* Enable receiving frames on the port, and activate auto-learning of - * MAC addresses. - */ - ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO | - ANA_PORT_PORT_CFG_RECV_ENA | - ANA_PORT_PORT_CFG_PORTID_VAL(port), - ANA_PORT_PORT_CFG, port); -} -EXPORT_SYMBOL(ocelot_port_enable); - -void ocelot_port_disable(struct ocelot *ocelot, int port) -{ - struct ocelot_port *ocelot_port = ocelot->ports[port]; - - ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG); - ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0); } -EXPORT_SYMBOL(ocelot_port_disable); +EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_up); static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, struct sk_buff *clone) @@ -1334,6 +1358,7 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) struct net_device *bond = ocelot_port->bond; mask = ocelot_get_bridge_fwd_mask(ocelot, bridge); + mask |= cpu_fwd_mask; mask &= ~BIT(port); if (bond) { mask &= ~ocelot_get_bond_mask(ocelot, bond, @@ -1956,6 +1981,15 @@ void ocelot_init_port(struct ocelot *ocelot, int port) /* Disable source address learning for standalone mode */ ocelot_port_set_learning(ocelot, port, false); + /* Set the port's initial logical port ID value, enable receiving + * frames on it, and configure the MAC address learning type to + * automatic. + */ + ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO | + ANA_PORT_PORT_CFG_RECV_ENA | + ANA_PORT_PORT_CFG_PORTID_VAL(port), + ANA_PORT_PORT_CFG, port); + /* Enable vcap lookups */ ocelot_vcap_enable(ocelot, port); } diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index db6b1a4c3926..1952d6a1b98a 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -12,8 +12,7 @@ #include <linux/etherdevice.h> #include <linux/if_vlan.h> #include <linux/net_tstamp.h> -#include <linux/phy.h> -#include <linux/phy/phy.h> +#include <linux/phylink.h> #include <linux/platform_device.h> #include <linux/regmap.h> @@ -42,11 +41,9 @@ struct ocelot_port_tc { struct ocelot_port_private { struct ocelot_port port; struct net_device *dev; - struct phy_device *phy; + struct phylink *phylink; + struct phylink_config phylink_config; u8 chip_port; - - struct phy *serdes; - struct ocelot_port_tc tc; }; @@ -107,7 +104,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, - struct phy_device *phy); + struct device_node *portnp); void ocelot_release_port(struct ocelot_port *ocelot_port); int ocelot_devlink_init(struct ocelot *ocelot); void ocelot_devlink_teardown(struct ocelot *ocelot); diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c index ea4e83410fe4..7390fa3980ec 100644 --- a/drivers/net/ethernet/mscc/ocelot_io.c +++ b/drivers/net/ethernet/mscc/ocelot_io.c @@ -21,7 +21,7 @@ u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset) ocelot->map[target][reg & REG_MASK] + offset, &val); return val; } -EXPORT_SYMBOL(__ocelot_read_ix); +EXPORT_SYMBOL_GPL(__ocelot_read_ix); void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset) { @@ -32,7 +32,7 @@ void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset) regmap_write(ocelot->targets[target], ocelot->map[target][reg & REG_MASK] + offset, val); } -EXPORT_SYMBOL(__ocelot_write_ix); +EXPORT_SYMBOL_GPL(__ocelot_write_ix); void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, u32 offset) @@ -45,7 +45,7 @@ void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, ocelot->map[target][reg & REG_MASK] + offset, mask, val); } -EXPORT_SYMBOL(__ocelot_rmw_ix); +EXPORT_SYMBOL_GPL(__ocelot_rmw_ix); u32 ocelot_port_readl(struct ocelot_port *port, u32 reg) { @@ -58,7 +58,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg) regmap_read(port->target, ocelot->map[target][reg & REG_MASK], &val); return val; } -EXPORT_SYMBOL(ocelot_port_readl); +EXPORT_SYMBOL_GPL(ocelot_port_readl); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) { @@ -69,7 +69,7 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) regmap_write(port->target, ocelot->map[target][reg & REG_MASK], val); } -EXPORT_SYMBOL(ocelot_port_writel); +EXPORT_SYMBOL_GPL(ocelot_port_writel); void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) { @@ -77,7 +77,7 @@ void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) ocelot_port_writel(port, (cur & (~mask)) | val, reg); } -EXPORT_SYMBOL(ocelot_port_rmwl); +EXPORT_SYMBOL_GPL(ocelot_port_rmwl); u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, u32 reg, u32 offset) @@ -128,7 +128,7 @@ int ocelot_regfields_init(struct ocelot *ocelot, return 0; } -EXPORT_SYMBOL(ocelot_regfields_init); +EXPORT_SYMBOL_GPL(ocelot_regfields_init); static struct regmap_config ocelot_regmap_config = { .reg_bits = 32, @@ -148,4 +148,4 @@ struct regmap *ocelot_regmap_init(struct ocelot *ocelot, struct resource *res) return devm_regmap_init_mmio(ocelot->dev, regs, &ocelot_regmap_config); } -EXPORT_SYMBOL(ocelot_regmap_init); +EXPORT_SYMBOL_GPL(ocelot_regmap_init); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index de900ea70fd4..5e8965be968a 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -9,10 +9,14 @@ */ #include <linux/if_bridge.h> +#include <linux/of_net.h> +#include <linux/phy/phy.h> #include <net/pkt_cls.h> #include "ocelot.h" #include "ocelot_vcap.h" +#define OCELOT_MAC_QUIRKS OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP + static struct ocelot *devlink_port_to_ocelot(struct devlink_port *dlp) { return devlink_priv(dlp->devlink); @@ -381,15 +385,6 @@ static int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type, return 0; } -static void ocelot_port_adjust_link(struct net_device *dev) -{ - struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; - int port = priv->chip_port; - - ocelot_adjust_link(ocelot, port, dev->phydev); -} - static int ocelot_vlan_vid_prepare(struct net_device *dev, u16 vid, bool pvid, bool untagged) { @@ -448,33 +443,8 @@ static int ocelot_vlan_vid_del(struct net_device *dev, u16 vid) static int ocelot_port_open(struct net_device *dev) { struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot_port *ocelot_port = &priv->port; - struct ocelot *ocelot = ocelot_port->ocelot; - int port = priv->chip_port; - int err; - - if (priv->serdes) { - err = phy_set_mode_ext(priv->serdes, PHY_MODE_ETHERNET, - ocelot_port->phy_mode); - if (err) { - netdev_err(dev, "Could not set mode of SerDes\n"); - return err; - } - } - err = phy_connect_direct(dev, priv->phy, &ocelot_port_adjust_link, - ocelot_port->phy_mode); - if (err) { - netdev_err(dev, "Could not attach to PHY\n"); - return err; - } - - dev->phydev = priv->phy; - - phy_attached_info(priv->phy); - phy_start(priv->phy); - - ocelot_port_enable(ocelot, port, priv->phy); + phylink_start(priv->phylink); return 0; } @@ -482,14 +452,8 @@ static int ocelot_port_open(struct net_device *dev) static int ocelot_port_stop(struct net_device *dev) { struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; - int port = priv->chip_port; - - phy_disconnect(priv->phy); - dev->phydev = NULL; - - ocelot_port_disable(ocelot, port); + phylink_stop(priv->phylink); return 0; } @@ -1528,8 +1492,188 @@ struct notifier_block ocelot_switchdev_blocking_nb __read_mostly = { .notifier_call = ocelot_switchdev_blocking_event, }; +static void vsc7514_phylink_validate(struct phylink_config *config, + unsigned long *supported, + struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct ocelot_port_private *priv = netdev_priv(ndev); + struct ocelot_port *ocelot_port = &priv->port; + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = {}; + + if (state->interface != PHY_INTERFACE_MODE_NA && + state->interface != ocelot_port->phy_mode) { + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + return; + } + + phylink_set_port_modes(mask); + + phylink_set(mask, Pause); + phylink_set(mask, Autoneg); + phylink_set(mask, Asym_Pause); + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 100baseT_Full); + phylink_set(mask, 1000baseT_Half); + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + phylink_set(mask, 2500baseT_Full); + phylink_set(mask, 2500baseX_Full); + + bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static void vsc7514_phylink_mac_config(struct phylink_config *config, + unsigned int link_an_mode, + const struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct ocelot_port_private *priv = netdev_priv(ndev); + struct ocelot_port *ocelot_port = &priv->port; + + /* Disable HDX fast control */ + ocelot_port_writel(ocelot_port, DEV_PORT_MISC_HDX_FAST_DIS, + DEV_PORT_MISC); + + /* SGMII only for now */ + ocelot_port_writel(ocelot_port, PCS1G_MODE_CFG_SGMII_MODE_ENA, + PCS1G_MODE_CFG); + ocelot_port_writel(ocelot_port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG); + + /* Enable PCS */ + ocelot_port_writel(ocelot_port, PCS1G_CFG_PCS_ENA, PCS1G_CFG); + + /* No aneg on SGMII */ + ocelot_port_writel(ocelot_port, 0, PCS1G_ANEG_CFG); + + /* No loopback */ + ocelot_port_writel(ocelot_port, 0, PCS1G_LB_CFG); +} + +static void vsc7514_phylink_mac_link_down(struct phylink_config *config, + unsigned int link_an_mode, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct ocelot_port_private *priv = netdev_priv(ndev); + struct ocelot *ocelot = priv->port.ocelot; + int port = priv->chip_port; + + ocelot_phylink_mac_link_down(ocelot, port, link_an_mode, interface, + OCELOT_MAC_QUIRKS); +} + +static void vsc7514_phylink_mac_link_up(struct phylink_config *config, + struct phy_device *phydev, + unsigned int link_an_mode, + phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct ocelot_port_private *priv = netdev_priv(ndev); + struct ocelot *ocelot = priv->port.ocelot; + int port = priv->chip_port; + + ocelot_phylink_mac_link_up(ocelot, port, phydev, link_an_mode, + interface, speed, duplex, + tx_pause, rx_pause, OCELOT_MAC_QUIRKS); +} + +static const struct phylink_mac_ops ocelot_phylink_ops = { + .validate = vsc7514_phylink_validate, + .mac_config = vsc7514_phylink_mac_config, + .mac_link_down = vsc7514_phylink_mac_link_down, + .mac_link_up = vsc7514_phylink_mac_link_up, +}; + +static int ocelot_port_phylink_create(struct ocelot *ocelot, int port, + struct device_node *portnp) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct ocelot_port_private *priv; + struct device *dev = ocelot->dev; + phy_interface_t phy_mode; + struct phylink *phylink; + int err; + + of_get_phy_mode(portnp, &phy_mode); + /* DT bindings of internal PHY ports are broken and don't + * specify a phy-mode + */ + if (phy_mode == PHY_INTERFACE_MODE_NA) + phy_mode = PHY_INTERFACE_MODE_INTERNAL; + + if (phy_mode != PHY_INTERFACE_MODE_SGMII && + phy_mode != PHY_INTERFACE_MODE_QSGMII && + phy_mode != PHY_INTERFACE_MODE_INTERNAL) { + dev_err(dev, "unsupported phy mode %s for port %d\n", + phy_modes(phy_mode), port); + return -EINVAL; + } + + /* Ensure clock signals and speed are set on all QSGMII links */ + if (phy_mode == PHY_INTERFACE_MODE_QSGMII) + ocelot_port_rmwl(ocelot_port, 0, + DEV_CLOCK_CFG_MAC_TX_RST | + DEV_CLOCK_CFG_MAC_TX_RST, + DEV_CLOCK_CFG); + + ocelot_port->phy_mode = phy_mode; + + if (phy_mode != PHY_INTERFACE_MODE_INTERNAL) { + struct phy *serdes = of_phy_get(portnp, NULL); + + if (IS_ERR(serdes)) { + err = PTR_ERR(serdes); + dev_err_probe(dev, err, + "missing SerDes phys for port %d\n", + port); + return err; + } + + err = phy_set_mode_ext(serdes, PHY_MODE_ETHERNET, phy_mode); + of_phy_put(serdes); + if (err) { + dev_err(dev, "Could not SerDes mode on port %d: %pe\n", + port, ERR_PTR(err)); + return err; + } + } + + priv = container_of(ocelot_port, struct ocelot_port_private, port); + + priv->phylink_config.dev = &priv->dev->dev; + priv->phylink_config.type = PHYLINK_NETDEV; + + phylink = phylink_create(&priv->phylink_config, + of_fwnode_handle(portnp), + phy_mode, &ocelot_phylink_ops); + if (IS_ERR(phylink)) { + err = PTR_ERR(phylink); + dev_err(dev, "Could not create phylink (%pe)\n", phylink); + return err; + } + + priv->phylink = phylink; + + err = phylink_of_phy_connect(phylink, portnp, 0); + if (err) { + dev_err(dev, "Could not connect to PHY: %pe\n", ERR_PTR(err)); + phylink_destroy(phylink); + priv->phylink = NULL; + return err; + } + + return 0; +} + int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, - struct phy_device *phy) + struct device_node *portnp) { struct ocelot_port_private *priv; struct ocelot_port *ocelot_port; @@ -1542,7 +1686,6 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, SET_NETDEV_DEV(dev, ocelot->dev); priv = netdev_priv(dev); priv->dev = dev; - priv->phy = phy; priv->chip_port = port; ocelot_port = &priv->port; ocelot_port->ocelot = ocelot; @@ -1563,15 +1706,23 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, ocelot_init_port(ocelot, port); + err = ocelot_port_phylink_create(ocelot, port, portnp); + if (err) + goto out; + err = register_netdev(dev); if (err) { dev_err(ocelot->dev, "register_netdev failed\n"); - free_netdev(dev); - ocelot->ports[port] = NULL; - return err; + goto out; } return 0; + +out: + ocelot->ports[port] = NULL; + free_netdev(dev); + + return err; } void ocelot_release_port(struct ocelot_port *ocelot_port) @@ -1581,5 +1732,14 @@ void ocelot_release_port(struct ocelot_port *ocelot_port) port); unregister_netdev(priv->dev); + + if (priv->phylink) { + rtnl_lock(); + phylink_disconnect_phy(priv->phylink); + rtnl_unlock(); + + phylink_destroy(priv->phylink); + } + free_netdev(priv->dev); } diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 4bd7e9d9ec61..18aed504f45d 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/of_net.h> #include <linux/netdevice.h> +#include <linux/phylink.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> #include <linux/mfd/syscon.h> @@ -945,13 +946,9 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, for_each_available_child_of_node(ports, portnp) { struct ocelot_port_private *priv; struct ocelot_port *ocelot_port; - struct device_node *phy_node; struct devlink_port *dlp; - phy_interface_t phy_mode; - struct phy_device *phy; struct regmap *target; struct resource *res; - struct phy *serdes; char res_name[8]; if (of_property_read_u32(portnp, "reg", ®)) @@ -975,15 +972,6 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, goto out_teardown; } - phy_node = of_parse_phandle(portnp, "phy-handle", 0); - if (!phy_node) - continue; - - phy = of_phy_find_device(phy_node); - of_node_put(phy_node); - if (!phy) - continue; - err = ocelot_port_devlink_init(ocelot, port, DEVLINK_PORT_FLAVOUR_PHYSICAL); if (err) { @@ -992,7 +980,7 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, } devlink_ports_registered |= BIT(port); - err = ocelot_probe_port(ocelot, port, target, phy); + err = ocelot_probe_port(ocelot, port, target, portnp); if (err) { of_node_put(portnp); goto out_teardown; @@ -1003,49 +991,6 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, port); dlp = &ocelot->devlink_ports[port]; devlink_port_type_eth_set(dlp, priv->dev); - - of_get_phy_mode(portnp, &phy_mode); - - ocelot_port->phy_mode = phy_mode; - - switch (ocelot_port->phy_mode) { - case PHY_INTERFACE_MODE_NA: - continue; - case PHY_INTERFACE_MODE_SGMII: - break; - case PHY_INTERFACE_MODE_QSGMII: - /* Ensure clock signals and speed is set on all - * QSGMII links - */ - ocelot_port_writel(ocelot_port, - DEV_CLOCK_CFG_LINK_SPEED - (OCELOT_SPEED_1000), - DEV_CLOCK_CFG); - break; - default: - dev_err(ocelot->dev, - "invalid phy mode for port%d, (Q)SGMII only\n", - port); - of_node_put(portnp); - err = -EINVAL; - goto out_teardown; - } - - serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); - if (IS_ERR(serdes)) { - err = PTR_ERR(serdes); - if (err == -EPROBE_DEFER) - dev_dbg(ocelot->dev, "deferring probe\n"); - else - dev_err(ocelot->dev, - "missing SerDes phys for port%d\n", - port); - - of_node_put(portnp); - goto out_teardown; - } - - priv->serdes = serdes; } /* Initialize unused devlink ports at the end */ @@ -1103,7 +1048,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (!np && !pdev->dev.platform_data) return -ENODEV; - devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot)); + devlink = + devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev); if (!devlink) return -ENOMEM; @@ -1187,7 +1133,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_put_ports; - err = devlink_register(devlink, ocelot->dev); + err = devlink_register(devlink); if (err) goto out_ocelot_deinit; diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index bd9d026e609d..3f982033944b 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) printk(version); #endif - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; /* natsemi has a non-standard PM control register @@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = ioremap(iostart, iosize); if (!ioaddr) { i = -ENOMEM; - goto err_ioremap; + goto err_pci_request_regions; } /* Work around the dropped serial bit. */ @@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_register_netdev: iounmap(ioaddr); - err_ioremap: - pci_release_regions(pdev); - err_pci_request_regions: free_netdev(dev); return i; @@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev) NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround); unregister_netdev (dev); - pci_release_regions (pdev); iounmap(ioaddr); free_netdev (dev); } diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 20fb4ad29865..df4a3f3da83a 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) kfree(vdev->vpaths); - /* we are safe to free it now */ - free_netdev(dev); - vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered", buf); vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf, __func__, __LINE__); + + /* we are safe to free it now */ + free_netdev(dev); } /* diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 742a420152b3..bb3b8a7f6c5d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_pci_disable; } - devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf)); + devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev); if (!devlink) { err = -ENOMEM; goto err_rel_regions; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index a213784ffa54..0bf2ff5717bc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 921db40047d7..d10a93801344 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_unmap; - err = devlink_register(devlink, &pf->pdev->dev); + err = devlink_register(devlink); if (err) goto err_app_clean; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig index af84f72bf08e..4e18b64dceb9 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig +++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig @@ -6,6 +6,7 @@ config PCH_GBE tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE" depends on PCI && (X86_32 || COMPILE_TEST) + depends on PTP_1588_CLOCK select MII select PTP_1588_CLOCK_PCH select NET_PTP_CLASSIFY diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index bc35d5703bd2..ec3e558f890e 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1031,13 +1031,7 @@ static void pch_gbe_watchdog(struct timer_list *t) struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; netdev->tx_queue_len = adapter->tx_queue_len; /* mii library handles link maintenance tasks */ - if (mii_ethtool_gset(&adapter->mii, &cmd)) { - netdev_err(netdev, "ethtool get setting Error\n"); - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + - PCH_GBE_WATCHDOG_PERIOD)); - return; - } + mii_ethtool_gset(&adapter->mii, &cmd); hw->mac.link_speed = ethtool_cmd_speed(&cmd); hw->mac.link_duplex = cmd.duplex; /* Set the RGMII control. */ diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c index ed832046216a..3426f6fa2b57 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c @@ -301,9 +301,7 @@ void pch_gbe_phy_init_setting(struct pch_gbe_hw *hw) int ret; u16 mii_reg; - ret = mii_ethtool_gset(&adapter->mii, &cmd); - if (ret) - netdev_err(adapter->netdev, "Error: mii_ethtool_gset\n"); + mii_ethtool_gset(&adapter->mii, &cmd); ethtool_cmd_speed_set(&cmd, hw->mac.link_speed); cmd.duplex = hw->mac.link_duplex; diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig index 202973a82712..3f7519e435b8 100644 --- a/drivers/net/ethernet/pensando/Kconfig +++ b/drivers/net/ethernet/pensando/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_PENSANDO config IONIC tristate "Pensando Ethernet IONIC Support" depends on 64BIT && PCI - depends on PTP_1588_CLOCK || !PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL select NET_DEVLINK select DIMLIB help diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index cd520e4c5522..c7d0e195d176 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev) { struct devlink *dl; - dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic)); + dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev); return devlink_priv(dl); } @@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic) struct devlink_port_attrs attrs = {}; int err; - err = devlink_register(dl, ionic->dev); + err = devlink_register(dl); if (err) { dev_warn(ionic->dev, "devlink_register failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 98f430905ffa..1203353238e5 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -99,7 +99,7 @@ config QED_SRIOV config QEDE tristate "QLogic QED 25/40/100Gb Ethernet NIC" depends on QED - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL help This enables the support for Marvell FastLinQ adapters family, ethernet driver. diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index cf7f4da68e69..4c7501b9c284 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev) struct devlink *dl; int rc; - dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink)); + dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink), + &cdev->pdev->dev); if (!dl) return ERR_PTR(-ENOMEM); qdevlink = devlink_priv(dl); qdevlink->cdev = cdev; - rc = devlink_register(dl, &cdev->pdev->dev); + rc = devlink_register(dl); if (rc) goto err_free; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 02a4610d9330..c46a7f756ed5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -327,6 +327,9 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) unsigned long flags; int rc = -EINVAL; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_tx->lock, flags); if (p_tx->b_completing_packet) { rc = -EBUSY; @@ -500,7 +503,16 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) unsigned long flags = 0; int rc = 0; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_rx->lock, flags); + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) { + spin_unlock_irqrestore(&p_rx->lock, flags); + return 0; + } + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); @@ -821,6 +833,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) return 0; @@ -844,6 +859,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) u16 new_idx = 0, num_bds = 0; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; @@ -1728,6 +1746,8 @@ int qed_ll2_post_rx_buffer(void *cxt, if (!p_ll2_conn) return -EINVAL; p_rx = &p_ll2_conn->rx_queue; + if (!p_rx->set_prod_addr) + return -EIO; spin_lock_irqsave(&p_rx->lock, flags); if (!list_empty(&p_rx->free_descq)) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index da864d12916b..4f4b79250a2b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1285,8 +1285,7 @@ qed_rdma_create_qp(void *rdma_cxt, if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info->active) { - DP_ERR(p_hwfn->cdev, - "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + pr_err("qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", rdma_cxt, in_params, out_params); return NULL; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 8693117a6180..66c69f0f9af1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -492,6 +492,7 @@ struct qede_fastpath { #define QEDE_SP_HW_ERR 4 #define QEDE_SP_ARFS_CONFIG 5 #define QEDE_SP_AER 7 +#define QEDE_SP_DISABLE 8 #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 033bf2c7f56c..d400e9b235bf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1005,6 +1005,13 @@ static void qede_sp_task(struct work_struct *work) struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + /* Disable execution of this deferred work once + * qede removal is in progress, this stop any future + * scheduling of sp_task. + */ + if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags)) + return; + /* The locking scheme depends on the specific flag: * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to * ensure that ongoing flows are ended and new ones are not started. @@ -1292,6 +1299,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY)); if (mode != QEDE_REMOVE_RECOVERY) { + set_bit(QEDE_SP_DISABLE, &edev->sp_flags); unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index d8882d0b6b49..d51bac7ba5af 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3156,8 +3156,10 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr, indirect_addr = QLC_83XX_FLASH_DIRECT_DATA(addr); ret = QLCRD32(adapter, indirect_addr, &err); - if (err == -EIO) + if (err == -EIO) { + qlcnic_83xx_unlock_flash(adapter); return err; + } word = ret; *(u32 *)p_data = word; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fa2dab6980bb..7a69b468584a 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2598,7 +2598,7 @@ static u32 rtl_csi_read(struct rtl8169_private *tp, int addr) RTL_R32(tp, CSIDR) : ~0; } -static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val) +static void rtl_set_aspm_entry_latency(struct rtl8169_private *tp, u8 val) { struct pci_dev *pdev = tp->pci_dev; u32 csi; @@ -2606,6 +2606,8 @@ static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val) /* According to Realtek the value at config space address 0x070f * controls the L0s/L1 entrance latency. We try standard ECAM access * first and if it fails fall back to CSI. + * bit 0..2: L0: 0 = 1us, 1 = 2us .. 6 = 7us, 7 = 7us (no typo) + * bit 3..5: L1: 0 = 1us, 1 = 2us .. 6 = 64us, 7 = 64us */ if (pdev->cfg_size > 0x070f && pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL) @@ -2619,7 +2621,8 @@ static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val) static void rtl_set_def_aspm_entry_latency(struct rtl8169_private *tp) { - rtl_csi_access_enable(tp, 0x27); + /* L0 7us, L1 16us */ + rtl_set_aspm_entry_latency(tp, 0x27); } struct ephy_info { @@ -3502,12 +3505,16 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN); + /* L0 7us, L1 32us - needed to avoid issues with link-up detection */ + rtl_set_aspm_entry_latency(tp, 0x2f); + rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000); /* disable EEE */ rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000); rtl_pcie_state_l2l3_disable(tp); + rtl_hw_aspm_clkreq_enable(tp, true); } DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond) diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index 5a2a4af31812..8008b2f45934 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -32,11 +32,11 @@ config SH_ETH config RAVB tristate "Renesas Ethernet AVB support" depends on ARCH_RENESAS || COMPILE_TEST + depends on PTP_1588_CLOCK_OPTIONAL select CRC32 select MII select MDIO_BITBANG select PHYLIB - imply PTP_1588_CLOCK help Renesas Ethernet AVB device driver. This driver supports the following SoCs: diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 80e62ca2e3d3..37ad0f8aaf3c 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -988,6 +988,21 @@ enum ravb_chip_id { RCAR_GEN3, }; +struct ravb_hw_info { + const char (*gstrings_stats)[ETH_GSTRING_LEN]; + size_t gstrings_size; + netdev_features_t net_hw_features; + netdev_features_t net_features; + enum ravb_chip_id chip_id; + int stats_len; + size_t max_rx_len; + unsigned aligned_tx: 1; + + /* hardware features */ + unsigned internal_delay:1; /* AVB-DMAC has internal delays */ + unsigned tx_counters:1; /* E-MAC has TX counters */ +}; + struct ravb_private { struct net_device *ndev; struct platform_device *pdev; @@ -1039,7 +1054,9 @@ struct ravb_private { unsigned rxcidm:1; /* RX Clock Internal Delay Mode */ unsigned txcidm:1; /* TX Clock Internal Delay Mode */ unsigned rgmii_override:1; /* Deprecated rgmii-*id behavior */ - int num_tx_desc; /* TX descriptors per packet */ + unsigned int num_tx_desc; /* TX descriptors per packet */ + + const struct ravb_hw_info *info; }; static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index f4dfe9f71d06..02842b980a7f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -177,10 +177,10 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only) { struct ravb_private *priv = netdev_priv(ndev); struct net_device_stats *stats = &priv->stats[q]; - int num_tx_desc = priv->num_tx_desc; + unsigned int num_tx_desc = priv->num_tx_desc; struct ravb_tx_desc *desc; + unsigned int entry; int free_num = 0; - int entry; u32 size; for (; priv->cur_tx[q] - priv->dirty_tx[q] > 0; priv->dirty_tx[q]++) { @@ -220,9 +220,9 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only) static void ravb_ring_free(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); - int num_tx_desc = priv->num_tx_desc; - int ring_size; - int i; + unsigned int num_tx_desc = priv->num_tx_desc; + unsigned int ring_size; + unsigned int i; if (priv->rx_ring[q]) { for (i = 0; i < priv->num_rx_ring[q]; i++) { @@ -275,15 +275,15 @@ static void ravb_ring_free(struct net_device *ndev, int q) static void ravb_ring_format(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); - int num_tx_desc = priv->num_tx_desc; + unsigned int num_tx_desc = priv->num_tx_desc; struct ravb_ex_rx_desc *rx_desc; struct ravb_tx_desc *tx_desc; struct ravb_desc *desc; - int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q]; - int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] * - num_tx_desc; + unsigned int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q]; + unsigned int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] * + num_tx_desc; dma_addr_t dma_addr; - int i; + unsigned int i; priv->cur_rx[q] = 0; priv->cur_tx[q] = 0; @@ -339,10 +339,11 @@ static void ravb_ring_format(struct net_device *ndev, int q) static int ravb_ring_init(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); - int num_tx_desc = priv->num_tx_desc; + const struct ravb_hw_info *info = priv->info; + unsigned int num_tx_desc = priv->num_tx_desc; + unsigned int ring_size; struct sk_buff *skb; - int ring_size; - int i; + unsigned int i; /* Allocate RX and TX skb rings */ priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], @@ -353,7 +354,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) goto error; for (i = 0; i < priv->num_rx_ring[q]; i++) { - skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1); + skb = netdev_alloc_skb(ndev, info->max_rx_len); if (!skb) goto error; ravb_set_buffer_align(skb); @@ -535,6 +536,7 @@ static void ravb_rx_csum(struct sk_buff *skb) static bool ravb_rx(struct net_device *ndev, int *quota, int q) { struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; int entry = priv->cur_rx[q] % priv->num_rx_ring[q]; int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) - priv->cur_rx[q]; @@ -619,9 +621,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) desc->ds_cc = cpu_to_le16(RX_BUF_SZ); if (!priv->rx_skb[q][entry]) { - skb = netdev_alloc_skb(ndev, - RX_BUF_SZ + - RAVB_ALIGN - 1); + skb = netdev_alloc_skb(ndev, info->max_rx_len); if (!skb) break; /* Better luck next round. */ ravb_set_buffer_align(skb); @@ -1133,13 +1133,14 @@ static const char ravb_gstrings_stats[][ETH_GSTRING_LEN] = { "rx_queue_1_over_errors", }; -#define RAVB_STATS_LEN ARRAY_SIZE(ravb_gstrings_stats) - static int ravb_get_sset_count(struct net_device *netdev, int sset) { + struct ravb_private *priv = netdev_priv(netdev); + const struct ravb_hw_info *info = priv->info; + switch (sset) { case ETH_SS_STATS: - return RAVB_STATS_LEN; + return info->stats_len; default: return -EOPNOTSUPP; } @@ -1176,9 +1177,12 @@ static void ravb_get_ethtool_stats(struct net_device *ndev, static void ravb_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { + struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; + switch (stringset) { case ETH_SS_STATS: - memcpy(data, ravb_gstrings_stats, sizeof(ravb_gstrings_stats)); + memcpy(data, info->gstrings_stats, info->gstrings_size); break; } } @@ -1488,7 +1492,7 @@ out: static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); - int num_tx_desc = priv->num_tx_desc; + unsigned int num_tx_desc = priv->num_tx_desc; u16 q = skb_get_queue_mapping(skb); struct ravb_tstamp_skb *ts_skb; struct ravb_tx_desc *desc; @@ -1628,13 +1632,14 @@ static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb, static struct net_device_stats *ravb_get_stats(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; struct net_device_stats *nstats, *stats0, *stats1; nstats = &ndev->stats; stats0 = &priv->stats[RAVB_BE]; stats1 = &priv->stats[RAVB_NC]; - if (priv->chip_id == RCAR_GEN3) { + if (info->tx_counters) { nstats->tx_dropped += ravb_read(ndev, TROCR); ravb_write(ndev, 0, TROCR); /* (write clear) */ } @@ -1924,12 +1929,35 @@ static int ravb_mdio_release(struct ravb_private *priv) return 0; } +static const struct ravb_hw_info ravb_gen3_hw_info = { + .gstrings_stats = ravb_gstrings_stats, + .gstrings_size = sizeof(ravb_gstrings_stats), + .net_hw_features = NETIF_F_RXCSUM, + .net_features = NETIF_F_RXCSUM, + .chip_id = RCAR_GEN3, + .stats_len = ARRAY_SIZE(ravb_gstrings_stats), + .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, + .internal_delay = 1, + .tx_counters = 1, +}; + +static const struct ravb_hw_info ravb_gen2_hw_info = { + .gstrings_stats = ravb_gstrings_stats, + .gstrings_size = sizeof(ravb_gstrings_stats), + .net_hw_features = NETIF_F_RXCSUM, + .net_features = NETIF_F_RXCSUM, + .chip_id = RCAR_GEN2, + .stats_len = ARRAY_SIZE(ravb_gstrings_stats), + .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, + .aligned_tx = 1, +}; + static const struct of_device_id ravb_match_table[] = { - { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 }, - { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 }, - { .compatible = "renesas,etheravb-rcar-gen2", .data = (void *)RCAR_GEN2 }, - { .compatible = "renesas,etheravb-r8a7795", .data = (void *)RCAR_GEN3 }, - { .compatible = "renesas,etheravb-rcar-gen3", .data = (void *)RCAR_GEN3 }, + { .compatible = "renesas,etheravb-r8a7790", .data = &ravb_gen2_hw_info }, + { .compatible = "renesas,etheravb-r8a7794", .data = &ravb_gen2_hw_info }, + { .compatible = "renesas,etheravb-rcar-gen2", .data = &ravb_gen2_hw_info }, + { .compatible = "renesas,etheravb-r8a7795", .data = &ravb_gen3_hw_info }, + { .compatible = "renesas,etheravb-rcar-gen3", .data = &ravb_gen3_hw_info }, { } }; MODULE_DEVICE_TABLE(of, ravb_match_table); @@ -1973,13 +2001,6 @@ static void ravb_set_config_mode(struct net_device *ndev) } } -static const struct soc_device_attribute ravb_delay_mode_quirk_match[] = { - { .soc_id = "r8a774c0" }, - { .soc_id = "r8a77990" }, - { .soc_id = "r8a77995" }, - { /* sentinel */ } -}; - /* Set tx and rx clock internal delay modes */ static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev) { @@ -2010,12 +2031,8 @@ static void ravb_parse_delay_mode(struct device_node *np, struct net_device *nde if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID || priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) { - if (!WARN(soc_device_match(ravb_delay_mode_quirk_match), - "phy-mode %s requires TX clock internal delay mode which is not supported by this hardware revision. Please update device tree", - phy_modes(priv->phy_interface))) { - priv->txcidm = 1; - priv->rgmii_override = 1; - } + priv->txcidm = 1; + priv->rgmii_override = 1; } } @@ -2034,8 +2051,8 @@ static void ravb_set_delay_mode(struct net_device *ndev) static int ravb_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; + const struct ravb_hw_info *info; struct ravb_private *priv; - enum ravb_chip_id chip_id; struct net_device *ndev; int error, irq, q; struct resource *res; @@ -2052,15 +2069,15 @@ static int ravb_probe(struct platform_device *pdev) if (!ndev) return -ENOMEM; - ndev->features = NETIF_F_RXCSUM; - ndev->hw_features = NETIF_F_RXCSUM; + info = of_device_get_match_data(&pdev->dev); + + ndev->features = info->net_features; + ndev->hw_features = info->net_hw_features; pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); - chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev); - - if (chip_id == RCAR_GEN3) + if (info->chip_id == RCAR_GEN3) irq = platform_get_irq_byname(pdev, "ch22"); else irq = platform_get_irq(pdev, 0); @@ -2073,6 +2090,7 @@ static int ravb_probe(struct platform_device *pdev) SET_NETDEV_DEV(ndev, &pdev->dev); priv = netdev_priv(ndev); + priv->info = info; priv->ndev = ndev; priv->pdev = pdev; priv->num_tx_ring[RAVB_BE] = BE_TX_RING_SIZE; @@ -2099,7 +2117,7 @@ static int ravb_probe(struct platform_device *pdev) priv->avb_link_active_low = of_property_read_bool(np, "renesas,ether-link-active-low"); - if (chip_id == RCAR_GEN3) { + if (info->chip_id == RCAR_GEN3) { irq = platform_get_irq_byname(pdev, "ch24"); if (irq < 0) { error = irq; @@ -2124,7 +2142,7 @@ static int ravb_probe(struct platform_device *pdev) } } - priv->chip_id = chip_id; + priv->chip_id = info->chip_id; priv->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(priv->clk)) { @@ -2142,7 +2160,7 @@ static int ravb_probe(struct platform_device *pdev) ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); ndev->min_mtu = ETH_MIN_MTU; - priv->num_tx_desc = chip_id == RCAR_GEN2 ? + priv->num_tx_desc = info->aligned_tx ? NUM_TX_DESC_GEN2 : NUM_TX_DESC_GEN3; /* Set function */ @@ -2160,7 +2178,7 @@ static int ravb_probe(struct platform_device *pdev) /* Request GTI loading */ ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI); - if (priv->chip_id != RCAR_GEN2) { + if (info->internal_delay) { ravb_parse_delay_mode(np, ndev); ravb_set_delay_mode(ndev); } @@ -2184,7 +2202,7 @@ static int ravb_probe(struct platform_device *pdev) INIT_LIST_HEAD(&priv->ts_skb_list); /* Initialise PTP Clock driver */ - if (chip_id != RCAR_GEN2) + if (info->chip_id != RCAR_GEN2) ravb_ptp_init(ndev, pdev); /* Debug message level */ @@ -2232,7 +2250,7 @@ out_dma_free: priv->desc_bat_dma); /* Stop PTP Clock driver */ - if (chip_id != RCAR_GEN2) + if (info->chip_id != RCAR_GEN2) ravb_ptp_stop(ndev); out_disable_refclk: clk_disable_unprepare(priv->refclk); @@ -2333,6 +2351,7 @@ static int __maybe_unused ravb_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; int ret = 0; /* If WoL is enabled set reset mode to rearm the WoL logic */ @@ -2355,7 +2374,7 @@ static int __maybe_unused ravb_resume(struct device *dev) /* Request GTI loading */ ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI); - if (priv->chip_id != RCAR_GEN2) + if (info->internal_delay) ravb_set_delay_mode(ndev); /* Restore descriptor base address table */ diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 53d407a5dbf7..3364b6a56bd1 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2716,7 +2716,7 @@ static void rocker_fdb_offload_notify(struct rocker_port *rocker_port, struct switchdev_notifier_fdb_info *recv_info) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = recv_info->addr; info.vid = recv_info->vid; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index b82e169b7836..3e1ca7a8d029 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1822,7 +1822,7 @@ static void ofdpa_port_fdb_learn_work(struct work_struct *work) container_of(work, struct ofdpa_fdb_learn_work, work); bool removing = (lw->flags & OFDPA_OP_FLAG_REMOVE); bool learned = (lw->flags & OFDPA_OP_FLAG_LEARNED); - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = lw->addr; info.vid = lw->vid; diff --git a/drivers/net/ethernet/samsung/Kconfig b/drivers/net/ethernet/samsung/Kconfig index 0582e110b1c0..2a6c2658d284 100644 --- a/drivers/net/ethernet/samsung/Kconfig +++ b/drivers/net/ethernet/samsung/Kconfig @@ -20,9 +20,9 @@ if NET_VENDOR_SAMSUNG config SXGBE_ETH tristate "Samsung 10G/2.5G/1G SXGBE Ethernet driver" depends on HAS_IOMEM && HAS_DMA + depends on PTP_1588_CLOCK_OPTIONAL select PHYLIB select CRC32 - imply PTP_1588_CLOCK help This is the driver for the SXGBE 10G Ethernet IP block found on Samsung platforms. diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 5e37c8313725..97ce64079855 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -19,9 +19,9 @@ if NET_VENDOR_SOLARFLARE config SFC tristate "Solarflare SFC9000/SFC9100/EF100-family support" depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL select MDIO select CRC32 - imply PTP_1588_CLOCK help This driver supports 10/40-gigabit Ethernet cards based on the Solarflare SFC9000-family and SFC9100-family controllers. diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index ac3c248d4f9b..929cfc22cd0c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -2,12 +2,12 @@ config STMMAC_ETH tristate "STMicroelectronics Multi-Gigabit Ethernet driver" depends on HAS_IOMEM && HAS_DMA + depends on PTP_1588_CLOCK_OPTIONAL select MII select PCS_XPCS select PAGE_POOL select PHYLINK select CRC32 - imply PTP_1588_CLOCK select RESET_CONTROLLER help This is the driver for the Ethernet IPs built around a diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 5fecc83f175b..b6d945ea903d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -58,6 +58,16 @@ #undef FRAME_FILTER_DEBUG /* #define FRAME_FILTER_DEBUG */ +struct stmmac_txq_stats { + unsigned long tx_pkt_n; + unsigned long tx_normal_irq_n; +}; + +struct stmmac_rxq_stats { + unsigned long rx_pkt_n; + unsigned long rx_normal_irq_n; +}; + /* Extra statistic and debug information exposed by ethtool */ struct stmmac_extra_stats { /* Transmit errors */ @@ -189,6 +199,9 @@ struct stmmac_extra_stats { unsigned long mtl_est_hlbf; unsigned long mtl_est_btre; unsigned long mtl_est_btrlm; + /* per queue statistics */ + struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES]; + struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES]; }; /* Safety Feature statistics exposed by ethtool */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 28dd0ed85a82..f7dc8458cde8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -289,10 +289,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL; break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val); @@ -309,10 +306,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id); break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val); @@ -329,8 +323,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id); break; default: - /* We don't get here; the switch above will have errored out */ - unreachable(); + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val); @@ -361,6 +354,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) return 0; +err_unsupported_phy: + dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", + phy_modes(gmac->phy_mode)); + err = -EINVAL; + err_remove_config_dt: stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index e63270267578..9292a1fab7d3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -170,13 +170,16 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr, x->normal_irq_n++; if (likely(intr_status & DMA_CHAN_STATUS_RI)) { x->rx_normal_irq_n++; + x->rxq_stats[chan].rx_normal_irq_n++; ret |= handle_rx; } - if (likely(intr_status & (DMA_CHAN_STATUS_TI | - DMA_CHAN_STATUS_TBU))) { + if (likely(intr_status & DMA_CHAN_STATUS_TI)) { x->tx_normal_irq_n++; + x->txq_stats[chan].tx_normal_irq_n++; ret |= handle_tx; } + if (unlikely(intr_status & DMA_CHAN_STATUS_TBU)) + ret |= handle_tx; if (unlikely(intr_status & DMA_CHAN_STATUS_ERI)) x->rx_early_irq++; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d0ce608b81c3..595c3ccdcbb7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -261,6 +261,18 @@ static const struct stmmac_stats stmmac_mmc[] = { }; #define STMMAC_MMC_STATS_LEN ARRAY_SIZE(stmmac_mmc) +static const char stmmac_qstats_tx_string[][ETH_GSTRING_LEN] = { + "tx_pkt_n", + "tx_irq_n", +#define STMMAC_TXQ_STATS ARRAY_SIZE(stmmac_qstats_tx_string) +}; + +static const char stmmac_qstats_rx_string[][ETH_GSTRING_LEN] = { + "rx_pkt_n", + "rx_irq_n", +#define STMMAC_RXQ_STATS ARRAY_SIZE(stmmac_qstats_rx_string) +}; + static void stmmac_ethtool_getdrvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -510,6 +522,31 @@ stmmac_set_pauseparam(struct net_device *netdev, } } +static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) +{ + u32 tx_cnt = priv->plat->tx_queues_to_use; + u32 rx_cnt = priv->plat->rx_queues_to_use; + int q, stat; + char *p; + + for (q = 0; q < tx_cnt; q++) { + p = (char *)priv + offsetof(struct stmmac_priv, + xstats.txq_stats[q].tx_pkt_n); + for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { + *data++ = (*(u64 *)p); + p += sizeof(u64 *); + } + } + for (q = 0; q < rx_cnt; q++) { + p = (char *)priv + offsetof(struct stmmac_priv, + xstats.rxq_stats[q].rx_pkt_n); + for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { + *data++ = (*(u64 *)p); + p += sizeof(u64 *); + } + } +} + static void stmmac_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *dummy, u64 *data) { @@ -560,16 +597,21 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, data[j++] = (stmmac_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? (*(u64 *)p) : (*(u32 *)p); } + stmmac_get_per_qstats(priv, &data[j]); } static int stmmac_get_sset_count(struct net_device *netdev, int sset) { struct stmmac_priv *priv = netdev_priv(netdev); + u32 tx_cnt = priv->plat->tx_queues_to_use; + u32 rx_cnt = priv->plat->rx_queues_to_use; int i, len, safety_len = 0; switch (sset) { case ETH_SS_STATS: - len = STMMAC_STATS_LEN; + len = STMMAC_STATS_LEN + + STMMAC_TXQ_STATS * tx_cnt + + STMMAC_RXQ_STATS * rx_cnt; if (priv->dma_cap.rmon) len += STMMAC_MMC_STATS_LEN; @@ -592,6 +634,28 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset) } } +static void stmmac_get_qstats_string(struct stmmac_priv *priv, u8 *data) +{ + u32 tx_cnt = priv->plat->tx_queues_to_use; + u32 rx_cnt = priv->plat->rx_queues_to_use; + int q, stat; + + for (q = 0; q < tx_cnt; q++) { + for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { + snprintf(data, ETH_GSTRING_LEN, "q%d_%s", q, + stmmac_qstats_tx_string[stat]); + data += ETH_GSTRING_LEN; + } + } + for (q = 0; q < rx_cnt; q++) { + for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { + snprintf(data, ETH_GSTRING_LEN, "q%d_%s", q, + stmmac_qstats_rx_string[stat]); + data += ETH_GSTRING_LEN; + } + } +} + static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data) { int i; @@ -622,6 +686,7 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data) ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + stmmac_get_qstats_string(priv, p); break; case ETH_SS_TEST: stmmac_selftest_get_strings(priv, p); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a2aa75cb184e..7b3fcf558603 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2500,6 +2500,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) } else { priv->dev->stats.tx_packets++; priv->xstats.tx_pkt_n++; + priv->xstats.txq_stats[queue].tx_pkt_n++; } if (skb) stmmac_get_tx_hwtstamp(priv, p, skb); @@ -5000,6 +5001,9 @@ read_again: stmmac_finalize_xdp_rx(priv, xdp_status); + priv->xstats.rx_pkt_n += count; + priv->xstats.rxq_stats[queue].rx_pkt_n += count; + if (xsk_uses_need_wakeup(rx_q->xsk_pool)) { if (failure || stmmac_rx_dirty(priv, queue) > 0) xsk_set_rx_need_wakeup(rx_q->xsk_pool); @@ -5287,6 +5291,7 @@ drain_data: stmmac_rx_refill(priv, queue); priv->xstats.rx_pkt_n += count; + priv->xstats.rxq_stats[queue].rx_pkt_n += count; return count; } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index fb5d2ac3f0d2..130346f74ee8 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -519,6 +519,10 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common, } napi_enable(&common->napi_rx); + if (common->rx_irq_disabled) { + common->rx_irq_disabled = false; + enable_irq(common->rx_chns.irq); + } dev_dbg(common->dev, "cpsw_nuss started\n"); return 0; @@ -872,8 +876,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget); - if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) - enable_irq(common->rx_chns.irq); + if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { + if (common->rx_irq_disabled) { + common->rx_irq_disabled = false; + enable_irq(common->rx_chns.irq); + } + } return num_rx; } @@ -1078,19 +1086,20 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) else num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget); - num_tx = min(num_tx, budget); - if (num_tx < budget) { - napi_complete(napi_tx); + if (num_tx >= budget) + return budget; + + if (napi_complete_done(napi_tx, num_tx)) enable_irq(tx_chn->irq); - } - return num_tx; + return 0; } static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id) { struct am65_cpsw_common *common = dev_id; + common->rx_irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&common->napi_rx); @@ -2061,8 +2070,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm for (i = 1; i <= common->port_num; i++) { struct am65_cpsw_port *port = am65_common_get_port(common, i); - struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev); + struct am65_cpsw_ndev_priv *priv; + + if (!port->ndev) + continue; + priv = am65_ndev_to_priv(port->ndev); priv->offload_fwd_mark = set_val; } } @@ -2409,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) int i; common->devlink = - devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv)); + devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!common->devlink) return -ENOMEM; dl_priv = devlink_priv(common->devlink); dl_priv->common = common; - ret = devlink_register(common->devlink, dev); + ret = devlink_register(common->devlink); if (ret) { dev_err(dev, "devlink reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 5d93e346f05e..048ed10143c1 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -126,6 +126,8 @@ struct am65_cpsw_common { struct am65_cpsw_rx_chn rx_chns; struct napi_struct napi_rx; + bool rx_irq_disabled; + u32 nuss_ver; u32 cpsw_ver; unsigned long bus_freq; diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c index 9c29b363e9ae..599708a3e81d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c @@ -358,7 +358,7 @@ static int am65_cpsw_port_obj_del(struct net_device *ndev, const void *ctx, static void am65_cpsw_fdb_offload_notify(struct net_device *ndev, struct switchdev_notifier_fdb_info *rcv) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = rcv->addr; info.vid = rcv->vid; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index abf9a2a6f7eb..9f70e40779f6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct cpdma_chan *txch; int ret, q_idx; - if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) { + if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) { cpsw_err(priv, tx_err, "packet pad failed\n"); ndev->stats.tx_dropped++; return NET_XMIT_DROP; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index ae167223e87f..534d39f729e2 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -375,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -502,7 +502,7 @@ static void cpsw_restore(struct cpsw_priv *priv) static void cpsw_init_stp_ale_entry(struct cpsw_common *cpsw) { - char stpa[] = {0x01, 0x80, 0xc2, 0x0, 0x0, 0x0}; + static const char stpa[] = {0x01, 0x80, 0xc2, 0x0, 0x0, 0x0}; cpsw_ale_add_mcast(cpsw->ale, stpa, ALE_PORT_HOST, ALE_SUPER, 0, @@ -921,7 +921,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct cpdma_chan *txch; int ret, q_idx; - if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) { + if (skb_put_padto(skb, READ_ONCE(priv->tx_packet_min))) { cpsw_err(priv, tx_err, "packet pad failed\n"); ndev->stats.tx_dropped++; return NET_XMIT_DROP; @@ -1101,7 +1101,7 @@ static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, for (i = 0; i < n; i++) { xdpf = frames[i]; - if (xdpf->len < CPSW_MIN_PACKET_SIZE) + if (xdpf->len < READ_ONCE(priv->tx_packet_min)) break; if (cpsw_xdp_tx_frame(priv, xdpf, NULL, priv->emac_port)) @@ -1390,6 +1390,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) priv->dev = dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); priv->emac_port = i + 1; + priv->tx_packet_min = CPSW_MIN_PACKET_SIZE; if (is_valid_ether_addr(slave_data->mac_addr)) { ether_addr_copy(priv->mac_addr, slave_data->mac_addr); @@ -1698,6 +1699,7 @@ static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id, priv = netdev_priv(sl_ndev); slave->port_vlan = vlan; + WRITE_ONCE(priv->tx_packet_min, CPSW_MIN_PACKET_SIZE_VLAN); if (netif_running(sl_ndev)) cpsw_port_add_switch_def_ale_entries(priv, slave); @@ -1726,6 +1728,7 @@ static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id, priv = netdev_priv(slave->ndev); slave->port_vlan = slave->data->dual_emac_res_vlan; + WRITE_ONCE(priv->tx_packet_min, CPSW_MIN_PACKET_SIZE); cpsw_port_add_dual_emac_def_ale_entries(priv, slave); } @@ -1800,14 +1803,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw) struct cpsw_devlink *dl_priv; int ret = 0; - cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv)); + cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!cpsw->devlink) return -ENOMEM; dl_priv = devlink_priv(cpsw->devlink); dl_priv->cpsw = cpsw; - ret = devlink_register(cpsw->devlink, dev); + ret = devlink_register(cpsw->devlink); if (ret) { dev_err(dev, "DL reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index a323bea54faa..2951fb7b9dae 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -89,7 +89,8 @@ do { \ #define CPSW_POLL_WEIGHT 64 #define CPSW_RX_VLAN_ENCAP_HDR_SIZE 4 -#define CPSW_MIN_PACKET_SIZE (VLAN_ETH_ZLEN) +#define CPSW_MIN_PACKET_SIZE_VLAN (VLAN_ETH_ZLEN) +#define CPSW_MIN_PACKET_SIZE (ETH_ZLEN) #define CPSW_MAX_PACKET_SIZE (VLAN_ETH_FRAME_LEN +\ ETH_FCS_LEN +\ CPSW_RX_VLAN_ENCAP_HDR_SIZE) @@ -380,6 +381,7 @@ struct cpsw_priv { u32 emac_port; struct cpsw_common *cpsw; int offload_fwd_mark; + u32 tx_packet_min; }; #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index f7fb6e17dadd..a7d97d429e06 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -368,7 +368,7 @@ static int cpsw_port_obj_del(struct net_device *ndev, const void *ctx, static void cpsw_fdb_offload_notify(struct net_device *ndev, struct switchdev_notifier_fdb_info *rcv) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = rcv->addr; info.vid = rcv->vid; diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 637796670746..b1c5cbe7478b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -943,7 +943,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev) goto fail_tx; } - ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE); + ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE); if (unlikely(ret_code < 0)) { if (netif_msg_tx_err(priv) && net_ratelimit()) dev_err(emac_dev, "DaVinci EMAC: packet pad failed"); diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index fcf3af76b6d7..8fe8887d506a 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -827,6 +827,12 @@ static void decode_data(struct sixpack *sp, unsigned char inbyte) return; } + if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) { + pr_err("6pack: cooked buffer overrun, data loss\n"); + sp->rx_count = 0; + return; + } + buf = sp->raw_buf; sp->cooked_buf[sp->rx_count_cooked++] = buf[0] | ((buf[1] << 2) & 0xc0); diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index ebc976b7fcc2..8caa61ec718f 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -418,7 +418,7 @@ static int hwsim_new_edge_nl(struct sk_buff *msg, struct genl_info *info) struct hwsim_edge *e; u32 v0, v1; - if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] && + if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; @@ -528,14 +528,14 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) u32 v0, v1; u8 lqi; - if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] && + if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; - if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] && + if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] || !edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]) return -EINVAL; diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index a67b6136e3c0..8f25107c1f1e 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -4,12 +4,11 @@ * Copyright (C) 2018-2021 Linaro Ltd. */ -#include <linux/refcount.h> -#include <linux/mutex.h> #include <linux/clk.h> #include <linux/device.h> #include <linux/interconnect.h> #include <linux/pm.h> +#include <linux/pm_runtime.h> #include <linux/bitops.h> #include "ipa.h" @@ -48,25 +47,25 @@ struct ipa_interconnect { /** * enum ipa_power_flag - IPA power flags * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled + * @IPA_POWER_FLAG_SYSTEM: Hardware is system (not runtime) suspended * @IPA_POWER_FLAG_COUNT: Number of defined power flags */ enum ipa_power_flag { IPA_POWER_FLAG_RESUMED, + IPA_POWER_FLAG_SYSTEM, IPA_POWER_FLAG_COUNT, /* Last; not a flag */ }; /** * struct ipa_clock - IPA clocking information - * @count: Clocking reference count - * @mutex: Protects clock enable/disable + * @dev: IPA device pointer * @core: IPA core clock * @flags: Boolean state flags * @interconnect_count: Number of elements in interconnect[] * @interconnect: Interconnect array */ struct ipa_clock { - refcount_t count; - struct mutex mutex; /* protects clock enable/disable */ + struct device *dev; struct clk *core; DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); u32 interconnect_count; @@ -223,66 +222,86 @@ static int ipa_clock_enable(struct ipa *ipa) } /* Inverse of ipa_clock_enable() */ -static void ipa_clock_disable(struct ipa *ipa) +static int ipa_clock_disable(struct ipa *ipa) { clk_disable_unprepare(ipa->clock->core); - (void)ipa_interconnect_disable(ipa); + + return ipa_interconnect_disable(ipa); } -/* Get an IPA clock reference, but only if the reference count is - * already non-zero. Returns true if the additional reference was - * added successfully, or false otherwise. - */ -bool ipa_clock_get_additional(struct ipa *ipa) +static int ipa_runtime_suspend(struct device *dev) { - return refcount_inc_not_zero(&ipa->clock->count); + struct ipa *ipa = dev_get_drvdata(dev); + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags); + ipa_endpoint_suspend(ipa); + gsi_suspend(&ipa->gsi); + } + + return ipa_clock_disable(ipa); } -/* Get an IPA clock reference. If the reference count is non-zero, it is - * incremented and return is immediate. Otherwise it is checked again - * under protection of the mutex, and if appropriate the IPA clock - * is enabled. - * - * Incrementing the reference count is intentionally deferred until - * after the clock is running and endpoints are resumed. - */ -void ipa_clock_get(struct ipa *ipa) +static int ipa_runtime_resume(struct device *dev) { - struct ipa_clock *clock = ipa->clock; + struct ipa *ipa = dev_get_drvdata(dev); int ret; - /* If the clock is running, just bump the reference count */ - if (ipa_clock_get_additional(ipa)) - return; + ret = ipa_clock_enable(ipa); + if (WARN_ON(ret < 0)) + return ret; + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + gsi_resume(&ipa->gsi); + ipa_endpoint_resume(ipa); + } - /* Otherwise get the mutex and check again */ - mutex_lock(&clock->mutex); + return 0; +} - /* A reference might have been added before we got the mutex. */ - if (ipa_clock_get_additional(ipa)) - goto out_mutex_unlock; +static int ipa_runtime_idle(struct device *dev) +{ + return -EAGAIN; +} - ret = ipa_clock_enable(ipa); - if (!ret) - refcount_set(&clock->count, 1); -out_mutex_unlock: - mutex_unlock(&clock->mutex); +/* Get an IPA clock reference. If the reference count is non-zero, it is + * incremented and return is immediate. Otherwise the IPA clock is + * enabled. + */ +int ipa_clock_get(struct ipa *ipa) +{ + return pm_runtime_get_sync(&ipa->pdev->dev); } /* Attempt to remove an IPA clock reference. If this represents the - * last reference, disable the IPA clock under protection of the mutex. + * last reference, disable the IPA clock. */ -void ipa_clock_put(struct ipa *ipa) +int ipa_clock_put(struct ipa *ipa) { - struct ipa_clock *clock = ipa->clock; + return pm_runtime_put(&ipa->pdev->dev); +} - /* If this is not the last reference there's nothing more to do */ - if (!refcount_dec_and_mutex_lock(&clock->count, &clock->mutex)) - return; +static int ipa_suspend(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); - ipa_clock_disable(ipa); + __set_bit(IPA_POWER_FLAG_SYSTEM, ipa->clock->flags); - mutex_unlock(&clock->mutex); + return pm_runtime_force_suspend(dev); +} + +static int ipa_resume(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_resume(dev); + + __clear_bit(IPA_POWER_FLAG_SYSTEM, ipa->clock->flags); + + return ret; } /* Return the current IPA core clock rate */ @@ -303,25 +322,35 @@ u32 ipa_clock_rate(struct ipa *ipa) */ static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) { - /* Just report the event, and let system resume handle the rest. - * More than one endpoint could signal this; if so, ignore - * all but the first. + /* To handle an IPA interrupt we will have resumed the hardware + * just to handle the interrupt, so we're done. If we are in a + * system suspend, trigger a system resume. */ - if (!test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags)) - pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); + if (!__test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags)) + if (test_bit(IPA_POWER_FLAG_SYSTEM, ipa->clock->flags)) + pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); /* Acknowledge/clear the suspend interrupt on all endpoints */ ipa_interrupt_suspend_clear_all(ipa->interrupt); } -void ipa_power_setup(struct ipa *ipa) +int ipa_power_setup(struct ipa *ipa) { + int ret; + ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, ipa_suspend_handler); + + ret = device_init_wakeup(&ipa->pdev->dev, true); + if (ret) + ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); + + return ret; } void ipa_power_teardown(struct ipa *ipa) { + (void)device_init_wakeup(&ipa->pdev->dev, false); ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); } @@ -352,6 +381,7 @@ ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) ret = -ENOMEM; goto err_clk_put; } + clock->dev = dev; clock->core = clk; clock->interconnect_count = data->interconnect_count; @@ -359,8 +389,8 @@ ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) if (ret) goto err_kfree; - mutex_init(&clock->mutex); - refcount_set(&clock->count, 0); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_enable(dev); return clock; @@ -377,68 +407,16 @@ void ipa_clock_exit(struct ipa_clock *clock) { struct clk *clk = clock->core; - WARN_ON(refcount_read(&clock->count) != 0); - mutex_destroy(&clock->mutex); + pm_runtime_disable(clock->dev); ipa_interconnect_exit(clock); kfree(clock); clk_put(clk); } -/** - * ipa_suspend() - Power management system suspend callback - * @dev: IPA device structure - * - * Return: Always returns zero - * - * Called by the PM framework when a system suspend operation is invoked. - * Suspends endpoints and releases the clock reference held to keep - * the IPA clock running until this point. - */ -static int ipa_suspend(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* Endpoints aren't usable until setup is complete */ - if (ipa->setup_complete) { - __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags); - ipa_endpoint_suspend(ipa); - gsi_suspend(&ipa->gsi); - } - - ipa_clock_put(ipa); - - return 0; -} - -/** - * ipa_resume() - Power management system resume callback - * @dev: IPA device structure - * - * Return: Always returns 0 - * - * Called by the PM framework when a system resume operation is invoked. - * Takes an IPA clock reference to keep the clock running until suspend, - * and resumes endpoints. - */ -static int ipa_resume(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* This clock reference will keep the IPA out of suspend - * until we get a power management suspend request. - */ - ipa_clock_get(ipa); - - /* Endpoints aren't usable until setup is complete */ - if (ipa->setup_complete) { - gsi_resume(&ipa->gsi); - ipa_endpoint_resume(ipa); - } - - return 0; -} - const struct dev_pm_ops ipa_pm_ops = { - .suspend = ipa_suspend, - .resume = ipa_resume, + .suspend = ipa_suspend, + .resume = ipa_resume, + .runtime_suspend = ipa_runtime_suspend, + .runtime_resume = ipa_runtime_resume, + .runtime_idle = ipa_runtime_idle, }; diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h index 2a0f7ff3c9e6..5c53241336a1 100644 --- a/drivers/net/ipa/ipa_clock.h +++ b/drivers/net/ipa/ipa_clock.h @@ -25,8 +25,10 @@ u32 ipa_clock_rate(struct ipa *ipa); /** * ipa_power_setup() - Set up IPA power management * @ipa: IPA pointer + * + * Return: 0 if successful, or a negative error code */ -void ipa_power_setup(struct ipa *ipa); +int ipa_power_setup(struct ipa *ipa); /** * ipa_power_teardown() - Inverse of ipa_power_setup() @@ -54,26 +56,24 @@ void ipa_clock_exit(struct ipa_clock *clock); * ipa_clock_get() - Get an IPA clock reference * @ipa: IPA pointer * - * This call blocks if this is the first reference. - */ -void ipa_clock_get(struct ipa *ipa); - -/** - * ipa_clock_get_additional() - Get an IPA clock reference if not first - * @ipa: IPA pointer + * Return: 0 if clock started, 1 if clock already running, or a negative + * error code * - * This returns immediately, and only takes a reference if not the first + * This call blocks if this is the first reference. A reference is + * taken even if an error occurs starting the IPA clock. */ -bool ipa_clock_get_additional(struct ipa *ipa); +int ipa_clock_get(struct ipa *ipa); /** * ipa_clock_put() - Drop an IPA clock reference * @ipa: IPA pointer * + * Return: 0 if successful, or a negative error code + * * This drops a clock reference. If the last reference is being dropped, * the clock is stopped and RX endpoints are suspended. This call will * not block unless the last reference is dropped. */ -void ipa_clock_put(struct ipa *ipa); +int ipa_clock_put(struct ipa *ipa); #endif /* _IPA_CLOCK_H_ */ diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index aa37f03f4557..934c14e066a0 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -83,8 +83,11 @@ static irqreturn_t ipa_isr_thread(int irq, void *dev_id) u32 pending; u32 offset; u32 mask; + int ret; - ipa_clock_get(ipa); + ret = ipa_clock_get(ipa); + if (WARN_ON(ret < 0)) + goto out_clock_put; /* The status register indicates which conditions are present, * including conditions whose interrupt is not enabled. Handle @@ -112,8 +115,8 @@ static irqreturn_t ipa_isr_thread(int irq, void *dev_id) offset = ipa_reg_irq_clr_offset(ipa->version); iowrite32(pending, ipa->reg_virt + offset); } - - ipa_clock_put(ipa); +out_clock_put: + (void)ipa_clock_put(ipa); return IRQ_HANDLED; } diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 25bbb456e007..69fa4b3120fd 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -101,9 +101,7 @@ int ipa_setup(struct ipa *ipa) if (ret) return ret; - ipa_power_setup(ipa); - - ret = device_init_wakeup(dev, true); + ret = ipa_power_setup(ipa); if (ret) goto err_gsi_teardown; @@ -154,7 +152,6 @@ err_command_disable: err_endpoint_teardown: ipa_endpoint_teardown(ipa); ipa_power_teardown(ipa); - (void)device_init_wakeup(dev, false); err_gsi_teardown: gsi_teardown(&ipa->gsi); @@ -181,7 +178,6 @@ static void ipa_teardown(struct ipa *ipa) ipa_endpoint_disable_one(command_endpoint); ipa_endpoint_teardown(ipa); ipa_power_teardown(ipa); - (void)device_init_wakeup(&ipa->pdev->dev, false); gsi_teardown(&ipa->gsi); } @@ -253,12 +249,11 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data) /* Compute the value to use in the COUNTER_CFG register AGGR_GRANULARITY * field to represent the given number of microseconds. The value is one * less than the number of timer ticks in the requested period. 0 is not - * a valid granularity value. + * a valid granularity value (so for example @usec must be at least 16 for + * a TIMER_FREQUENCY of 32000). */ -static u32 ipa_aggr_granularity_val(u32 usec) +static __always_inline u32 ipa_aggr_granularity_val(u32 usec) { - WARN_ON(!usec); - return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1; } @@ -427,12 +422,6 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) { int ret; - /* Get a clock reference to allow initialization. This reference - * is held after initialization completes, and won't get dropped - * unless/until a system suspend request arrives. - */ - ipa_clock_get(ipa); - ipa_hardware_config(ipa, data); ret = ipa_mem_config(ipa); @@ -475,7 +464,6 @@ err_mem_deconfig: ipa_mem_deconfig(ipa); err_hardware_deconfig: ipa_hardware_deconfig(ipa); - ipa_clock_put(ipa); return ret; } @@ -493,7 +481,6 @@ static void ipa_deconfig(struct ipa *ipa) ipa->interrupt = NULL; ipa_mem_deconfig(ipa); ipa_hardware_deconfig(ipa); - ipa_clock_put(ipa); } static int ipa_firmware_load(struct device *dev) @@ -750,20 +737,22 @@ static int ipa_probe(struct platform_device *pdev) goto err_table_exit; /* The clock needs to be active for config and setup */ - ipa_clock_get(ipa); + ret = ipa_clock_get(ipa); + if (WARN_ON(ret < 0)) + goto err_clock_put; ret = ipa_config(ipa, data); if (ret) - goto err_clock_put; /* Error */ + goto err_clock_put; dev_info(dev, "IPA driver initialized"); /* If the modem is doing early initialization, it will trigger a - * call to ipa_setup() call when it has finished. In that case - * we're done here. + * call to ipa_setup() when it has finished. In that case we're + * done here. */ if (modem_init) - goto out_clock_put; /* Done; no error */ + goto done; /* Otherwise we need to load the firmware and have Trust Zone validate * and install it. If that succeeds we can proceed with setup. @@ -775,16 +764,15 @@ static int ipa_probe(struct platform_device *pdev) ret = ipa_setup(ipa); if (ret) goto err_deconfig; - -out_clock_put: - ipa_clock_put(ipa); +done: + (void)ipa_clock_put(ipa); return 0; err_deconfig: ipa_deconfig(ipa); err_clock_put: - ipa_clock_put(ipa); + (void)ipa_clock_put(ipa); ipa_modem_exit(ipa); err_table_exit: ipa_table_exit(ipa); @@ -810,7 +798,9 @@ static int ipa_remove(struct platform_device *pdev) struct ipa_clock *clock = ipa->clock; int ret; - ipa_clock_get(ipa); + ret = ipa_clock_get(ipa); + if (WARN_ON(ret < 0)) + goto out_clock_put; if (ipa->setup_complete) { ret = ipa_modem_stop(ipa); @@ -826,8 +816,8 @@ static int ipa_remove(struct platform_device *pdev) } ipa_deconfig(ipa); - - ipa_clock_put(ipa); +out_clock_put: + (void)ipa_clock_put(ipa); ipa_modem_exit(ipa); ipa_table_exit(ipa); diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index ad4019e8016e..c8724af935b8 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -9,6 +9,7 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/if_rmnet.h> +#include <linux/pm_runtime.h> #include <linux/remoteproc/qcom_rproc.h> #include "ipa.h" @@ -33,9 +34,14 @@ enum ipa_modem_state { IPA_MODEM_STATE_STOPPING, }; -/** struct ipa_priv - IPA network device private data */ +/** + * struct ipa_priv - IPA network device private data + * @ipa: IPA pointer + * @work: Work structure used to wake the modem netdev TX queue + */ struct ipa_priv { struct ipa *ipa; + struct work_struct work; }; /** ipa_open() - Opens the modem network interface */ @@ -45,7 +51,9 @@ static int ipa_open(struct net_device *netdev) struct ipa *ipa = priv->ipa; int ret; - ipa_clock_get(ipa); + ret = ipa_clock_get(ipa); + if (WARN_ON(ret < 0)) + goto err_clock_put; ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); if (ret) @@ -57,12 +65,14 @@ static int ipa_open(struct net_device *netdev) netif_start_queue(netdev); + (void)ipa_clock_put(ipa); + return 0; err_disable_tx: ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); err_clock_put: - ipa_clock_put(ipa); + (void)ipa_clock_put(ipa); return ret; } @@ -72,13 +82,18 @@ static int ipa_stop(struct net_device *netdev) { struct ipa_priv *priv = netdev_priv(netdev); struct ipa *ipa = priv->ipa; + int ret; + + ret = ipa_clock_get(ipa); + if (WARN_ON(ret < 0)) + goto out_clock_put; netif_stop_queue(netdev); ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); - - ipa_clock_put(ipa); +out_clock_put: + (void)ipa_clock_put(ipa); return 0; } @@ -91,13 +106,15 @@ static int ipa_stop(struct net_device *netdev) * NETDEV_TX_OK: Success * NETDEV_TX_BUSY: Error while transmitting the skb. Try again later */ -static int ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t +ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct net_device_stats *stats = &netdev->stats; struct ipa_priv *priv = netdev_priv(netdev); struct ipa_endpoint *endpoint; struct ipa *ipa = priv->ipa; u32 skb_len = skb->len; + struct device *dev; int ret; if (!skb_len) @@ -107,7 +124,31 @@ static int ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) if (endpoint->data->qmap && skb->protocol != htons(ETH_P_MAP)) goto err_drop_skb; + /* The hardware must be powered for us to transmit */ + dev = &ipa->pdev->dev; + ret = pm_runtime_get(dev); + if (ret < 1) { + /* If a resume won't happen, just drop the packet */ + if (ret < 0 && ret != -EINPROGRESS) { + pm_runtime_put_noidle(dev); + goto err_drop_skb; + } + + /* No power (yet). Stop the network stack from transmitting + * until we're resumed; ipa_modem_resume() arranges for the + * TX queue to be started again. + */ + netif_stop_queue(netdev); + + (void)pm_runtime_put(dev); + + return NETDEV_TX_BUSY; + } + ret = ipa_endpoint_skb_tx(endpoint, skb); + + (void)pm_runtime_put(dev); + if (ret) { if (ret != -E2BIG) return NETDEV_TX_BUSY; @@ -181,12 +222,28 @@ void ipa_modem_suspend(struct net_device *netdev) if (!(netdev->flags & IFF_UP)) return; - netif_stop_queue(netdev); - ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); } +/** + * ipa_modem_wake_queue_work() - enable modem netdev queue + * @work: Work structure + * + * Re-enable transmit on the modem network device. This is called + * in (power management) work queue context, scheduled when resuming + * the modem. We can't enable the queue directly in ipa_modem_resume() + * because transmits restart the instant the queue is awakened; but the + * device power state won't be ACTIVE until *after* ipa_modem_resume() + * returns. + */ +static void ipa_modem_wake_queue_work(struct work_struct *work) +{ + struct ipa_priv *priv = container_of(work, struct ipa_priv, work); + + netif_wake_queue(priv->ipa->modem_netdev); +} + /** ipa_modem_resume() - resume callback for runtime_pm * @dev: pointer to device * @@ -203,7 +260,8 @@ void ipa_modem_resume(struct net_device *netdev) ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); - netif_wake_queue(netdev); + /* Arrange for the TX queue to be restarted */ + (void)queue_pm_work(&priv->work); } int ipa_modem_start(struct ipa *ipa) @@ -231,6 +289,7 @@ int ipa_modem_start(struct ipa *ipa) SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; + INIT_WORK(&priv->work, ipa_modem_wake_queue_work); ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; ipa->modem_netdev = netdev; @@ -275,6 +334,9 @@ int ipa_modem_stop(struct ipa *ipa) /* Clean up the netdev and endpoints if it was started */ if (netdev) { + struct ipa_priv *priv = netdev_priv(netdev); + + cancel_work_sync(&priv->work); /* If it was opened, stop it first */ if (netdev->flags & IFF_UP) (void)ipa_stop(netdev); @@ -297,7 +359,9 @@ static void ipa_modem_crashed(struct ipa *ipa) struct device *dev = &ipa->pdev->dev; int ret; - ipa_clock_get(ipa); + ret = ipa_clock_get(ipa); + if (WARN_ON(ret < 0)) + goto out_clock_put; ipa_endpoint_modem_pause_all(ipa, true); @@ -324,7 +388,8 @@ static void ipa_modem_crashed(struct ipa *ipa) if (ret) dev_err(dev, "error %d zeroing modem memory regions\n", ret); - ipa_clock_put(ipa); +out_clock_put: + (void)ipa_clock_put(ipa); } static int ipa_modem_notify(struct notifier_block *nb, unsigned long action, diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index 0d15438a79e2..04b977cf9159 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/panic_notifier.h> +#include <linux/pm_runtime.h> #include <linux/soc/qcom/smem.h> #include <linux/soc/qcom/smem_state.h> @@ -84,13 +85,15 @@ struct ipa_smp2p { */ static void ipa_smp2p_notify(struct ipa_smp2p *smp2p) { + struct device *dev; u32 value; u32 mask; if (smp2p->notified) return; - smp2p->clock_on = ipa_clock_get_additional(smp2p->ipa); + dev = &smp2p->ipa->pdev->dev; + smp2p->clock_on = pm_runtime_get_if_active(dev, true) > 0; /* Signal whether the clock is enabled */ mask = BIT(smp2p->enabled_bit); @@ -150,24 +153,26 @@ static void ipa_smp2p_panic_notifier_unregister(struct ipa_smp2p *smp2p) static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) { struct ipa_smp2p *smp2p = dev_id; + int ret; mutex_lock(&smp2p->mutex); - if (!smp2p->disabled) { - int ret; - - /* The clock needs to be active for setup */ - ipa_clock_get(smp2p->ipa); + if (smp2p->disabled) + goto out_mutex_unlock; + smp2p->disabled = true; /* If any others arrive, ignore them */ - ret = ipa_setup(smp2p->ipa); - if (ret) - dev_err(&smp2p->ipa->pdev->dev, - "error %d from ipa_setup()\n", ret); - smp2p->disabled = true; + /* The clock needs to be active for setup */ + ret = ipa_clock_get(smp2p->ipa); + if (WARN_ON(ret < 0)) + goto out_clock_put; - ipa_clock_put(smp2p->ipa); - } + /* An error here won't cause driver shutdown, so warn if one occurs */ + ret = ipa_setup(smp2p->ipa); + WARN(ret != 0, "error %d from ipa_setup()\n", ret); +out_clock_put: + (void)ipa_clock_put(smp2p->ipa); +out_mutex_unlock: mutex_unlock(&smp2p->mutex); return IRQ_HANDLED; @@ -206,7 +211,7 @@ static void ipa_smp2p_clock_release(struct ipa *ipa) if (!ipa->smp2p->clock_on) return; - ipa_clock_put(ipa); + (void)ipa_clock_put(ipa); ipa->smp2p->clock_on = false; } diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index f88ee02457d4..9c8818c39073 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -154,7 +154,7 @@ static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id) case IPA_UC_RESPONSE_INIT_COMPLETED: if (ipa->uc_clocked) { ipa->uc_loaded = true; - ipa_clock_put(ipa); + (void)ipa_clock_put(ipa); ipa->uc_clocked = false; } else { dev_warn(dev, "unexpected init_completed response\n"); @@ -182,21 +182,25 @@ void ipa_uc_deconfig(struct ipa *ipa) ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1); ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0); if (ipa->uc_clocked) - ipa_clock_put(ipa); + (void)ipa_clock_put(ipa); } /* Take a proxy clock reference for the microcontroller */ void ipa_uc_clock(struct ipa *ipa) { static bool already; + int ret; if (already) return; already = true; /* Only do this on first boot */ /* This clock reference dropped in ipa_uc_response_hdlr() above */ - ipa_clock_get(ipa); - ipa->uc_clocked = true; + ret = ipa_clock_get(ipa); + if (WARN(ret < 0, "error %d getting proxy clock\n", ret)) + (void)ipa_clock_put(ipa); + + ipa->uc_clocked = ret >= 0; } /* Send a command to the microcontroller */ diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig index 99a6c13a11af..6da1fcb25847 100644 --- a/drivers/net/mdio/Kconfig +++ b/drivers/net/mdio/Kconfig @@ -169,9 +169,10 @@ config MDIO_OCTEON config MDIO_IPQ4019 tristate "Qualcomm IPQ4019 MDIO interface support" depends on HAS_IOMEM && OF_MDIO + depends on COMMON_CLK help This driver supports the MDIO interface found in Qualcomm - IPQ40xx series Soc-s. + IPQ40xx, IPQ60xx, IPQ807x and IPQ50xx series Soc-s. config MDIO_IPQ8064 tristate "Qualcomm IPQ8064 MDIO interface support" diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c index 9cd71d896963..14e08b786334 100644 --- a/drivers/net/mdio/mdio-ipq4019.c +++ b/drivers/net/mdio/mdio-ipq4019.c @@ -11,6 +11,7 @@ #include <linux/of_mdio.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/clk.h> #define MDIO_MODE_REG 0x40 #define MDIO_ADDR_REG 0x44 @@ -31,8 +32,15 @@ #define IPQ4019_MDIO_TIMEOUT 10000 #define IPQ4019_MDIO_SLEEP 10 +/* MDIO clock source frequency is fixed to 100M */ +#define IPQ_MDIO_CLK_RATE 100000000 + +#define IPQ_PHY_SET_DELAY_US 100000 + struct ipq4019_mdio_data { void __iomem *membase; + void __iomem *eth_ldo_rdy; + struct clk *mdio_clk; }; static int ipq4019_mdio_wait_busy(struct mii_bus *bus) @@ -171,10 +179,35 @@ static int ipq4019_mdio_write(struct mii_bus *bus, int mii_id, int regnum, return 0; } +static int ipq_mdio_reset(struct mii_bus *bus) +{ + struct ipq4019_mdio_data *priv = bus->priv; + u32 val; + int ret; + + /* To indicate CMN_PLL that ethernet_ldo has been ready if platform resource 1 + * is specified in the device tree. + */ + if (priv->eth_ldo_rdy) { + val = readl(priv->eth_ldo_rdy); + val |= BIT(0); + writel(val, priv->eth_ldo_rdy); + fsleep(IPQ_PHY_SET_DELAY_US); + } + + /* Configure MDIO clock source frequency if clock is specified in the device tree */ + ret = clk_set_rate(priv->mdio_clk, IPQ_MDIO_CLK_RATE); + if (ret) + return ret; + + return clk_prepare_enable(priv->mdio_clk); +} + static int ipq4019_mdio_probe(struct platform_device *pdev) { struct ipq4019_mdio_data *priv; struct mii_bus *bus; + struct resource *res; int ret; bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*priv)); @@ -187,9 +220,19 @@ static int ipq4019_mdio_probe(struct platform_device *pdev) if (IS_ERR(priv->membase)) return PTR_ERR(priv->membase); + priv->mdio_clk = devm_clk_get_optional(&pdev->dev, "gcc_mdio_ahb_clk"); + if (IS_ERR(priv->mdio_clk)) + return PTR_ERR(priv->mdio_clk); + + /* The platform resource is provided on the chipset IPQ5018 */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (res) + priv->eth_ldo_rdy = devm_ioremap_resource(&pdev->dev, res); + bus->name = "ipq4019_mdio"; bus->read = ipq4019_mdio_read; bus->write = ipq4019_mdio_write; + bus->reset = ipq_mdio_reset; bus->parent = &pdev->dev; snprintf(bus->id, MII_BUS_ID_SIZE, "%s%d", pdev->name, pdev->id); @@ -215,6 +258,7 @@ static int ipq4019_mdio_remove(struct platform_device *pdev) static const struct of_device_id ipq4019_mdio_dt_ids[] = { { .compatible = "qcom,ipq4019-mdio" }, + { .compatible = "qcom,ipq5018-mdio" }, { } }; MODULE_DEVICE_TABLE(of, ipq4019_mdio_dt_ids); diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c index 110e4ee85785..ebd001f0eece 100644 --- a/drivers/net/mdio/mdio-mux.c +++ b/drivers/net/mdio/mdio-mux.c @@ -82,6 +82,17 @@ out: static int parent_count; +static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb) +{ + struct mdio_mux_child_bus *cb = pb->children; + + while (cb) { + mdiobus_unregister(cb->mii_bus); + mdiobus_free(cb->mii_bus); + cb = cb->next; + } +} + int mdio_mux_init(struct device *dev, struct device_node *mux_node, int (*switch_fn)(int cur, int desired, void *data), @@ -144,7 +155,7 @@ int mdio_mux_init(struct device *dev, cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL); if (!cb) { ret_val = -ENOMEM; - continue; + goto err_loop; } cb->bus_number = v; cb->parent = pb; @@ -152,8 +163,7 @@ int mdio_mux_init(struct device *dev, cb->mii_bus = mdiobus_alloc(); if (!cb->mii_bus) { ret_val = -ENOMEM; - devm_kfree(dev, cb); - continue; + goto err_loop; } cb->mii_bus->priv = cb; @@ -165,11 +175,15 @@ int mdio_mux_init(struct device *dev, cb->mii_bus->write = mdio_mux_write; r = of_mdiobus_register(cb->mii_bus, child_bus_node); if (r) { + mdiobus_free(cb->mii_bus); + if (r == -EPROBE_DEFER) { + ret_val = r; + goto err_loop; + } + devm_kfree(dev, cb); dev_err(dev, "Error: Failed to register MDIO bus for child %pOF\n", child_bus_node); - mdiobus_free(cb->mii_bus); - devm_kfree(dev, cb); } else { cb->next = pb->children; pb->children = cb; @@ -181,7 +195,10 @@ int mdio_mux_init(struct device *dev, } dev_err(dev, "Error: No acceptable child buses found\n"); - devm_kfree(dev, pb); + +err_loop: + mdio_mux_uninit_children(pb); + of_node_put(child_bus_node); err_pb_kz: put_device(&parent_bus->dev); err_parent_bus: @@ -193,14 +210,8 @@ EXPORT_SYMBOL_GPL(mdio_mux_init); void mdio_mux_uninit(void *mux_handle) { struct mdio_mux_parent_bus *pb = mux_handle; - struct mdio_mux_child_bus *cb = pb->children; - - while (cb) { - mdiobus_unregister(cb->mii_bus); - mdiobus_free(cb->mii_bus); - cb = cb->next; - } + mdio_mux_uninit_children(pb); put_device(&pb->mii_bus->dev); } EXPORT_SYMBOL_GPL(mdio_mux_uninit); diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index d127eb6e9257..975f7f9bdf4c 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -319,7 +319,7 @@ static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev) u64_stats_init(&mhi_netdev->stats.tx_syncp); /* Start MHI channels */ - err = mhi_prepare_for_transfer(mhi_dev); + err = mhi_prepare_for_transfer(mhi_dev, 0); if (err) goto out_err; diff --git a/drivers/net/mii.c b/drivers/net/mii.c index 779c3a96dba7..22680f47385d 100644 --- a/drivers/net/mii.c +++ b/drivers/net/mii.c @@ -49,10 +49,8 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr) * * The @ecmd parameter is expected to have been cleared before calling * mii_ethtool_gset(). - * - * Returns 0 for success, negative on error. */ -int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) +void mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) { struct net_device *dev = mii->dev; u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0; @@ -131,8 +129,6 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) mii->full_duplex = ecmd->duplex; /* ignore maxtxpkt, maxrxpkt for now */ - - return 0; } /** diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index ff01e5bdc72e..62d033a1a557 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -195,12 +193,15 @@ new_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; + + if (nsim_bus_dev->in_reload) { + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return -EBUSY; + } - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -212,8 +213,6 @@ del_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -224,12 +223,15 @@ del_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; + + if (nsim_bus_dev->in_reload) { + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return -EBUSY; + } - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d538a39d4225..54313bd57797 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -864,16 +864,24 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_bus_dev *nsim_bus_dev; + + nsim_bus_dev = nsim_dev->nsim_bus_dev; + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EOPNOTSUPP; if (nsim_dev->dont_allow_reload) { /* For testing purposes, user set debugfs dont_allow_reload * value to true. So forbid it. */ NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return -EOPNOTSUPP; } + nsim_bus_dev->in_reload = true; nsim_dev_reload_destroy(nsim_dev); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return 0; } @@ -882,17 +890,26 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_bus_dev *nsim_bus_dev; + int ret; + + nsim_bus_dev = nsim_dev->nsim_bus_dev; + mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); + nsim_bus_dev->in_reload = false; if (nsim_dev->fail_reload) { /* For testing purposes, user set debugfs fail_reload * value to true. Fail right away. */ NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return -EINVAL; } *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return nsim_dev_reload_create(nsim_dev, extack); + ret = nsim_dev_reload_create(nsim_dev, extack); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return ret; } static int nsim_dev_info_get(struct devlink *devlink, @@ -1432,7 +1449,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) int err; devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev), - nsim_bus_dev->initial_net); + nsim_bus_dev->initial_net, &nsim_bus_dev->dev); if (!devlink) return -ENOMEM; nsim_dev = devlink_priv(devlink); @@ -1453,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_devlink_free; - err = devlink_register(devlink, &nsim_bus_dev->dev); + err = devlink_register(devlink); if (err) goto err_resources_unregister; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 1c20bcbd9d91..793c86dc5a9c 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -362,6 +362,7 @@ struct nsim_bus_dev { struct nsim_vf_config *vfconfigs; /* Lock for devlink->reload_enabled in netdevsim module */ struct mutex nsim_bus_reload_lock; + bool in_reload; bool init; }; diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 63fda3fc40aa..fb0a83dc09ac 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -65,6 +65,9 @@ static const int xpcs_xlgmii_features[] = { }; static const int xpcs_sgmii_features[] = { + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_10baseT_Half_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT, @@ -75,6 +78,7 @@ static const int xpcs_sgmii_features[] = { }; static const int xpcs_2500basex_features[] = { + ETHTOOL_LINK_MODE_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_2500baseX_Full_BIT, @@ -1089,7 +1093,7 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev, xpcs = kzalloc(sizeof(*xpcs), GFP_KERNEL); if (!xpcs) - return NULL; + return ERR_PTR(-ENOMEM); xpcs->mdiodev = mdiodev; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 7564ae0c1997..902495afcb38 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -236,6 +236,7 @@ config MICROCHIP_T1_PHY config MICROSEMI_PHY tristate "Microsemi PHYs" depends on MACSEC || MACSEC=n + depends on PTP_1588_CLOCK_OPTIONAL || !NETWORK_PHY_TIMESTAMPING select CRYPTO_LIB_AES if MACSEC help Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs @@ -253,6 +254,7 @@ config NATIONAL_PHY config NXP_C45_TJA11XX_PHY tristate "NXP C45 TJA11XX PHYs" + depends on PTP_1588_CLOCK_OPTIONAL help Enable support for NXP C45 TJA11XX PHYs. Currently supports only the TJA1103 PHY. diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 3de93c9f2744..4fcfca4e1702 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -32,6 +32,7 @@ #include <linux/marvell_phy.h> #include <linux/bitfield.h> #include <linux/of.h> +#include <linux/sfp.h> #include <linux/io.h> #include <asm/irq.h> @@ -46,6 +47,7 @@ #define MII_MARVELL_MISC_TEST_PAGE 0x06 #define MII_MARVELL_VCT7_PAGE 0x07 #define MII_MARVELL_WOL_PAGE 0x11 +#define MII_MARVELL_MODE_PAGE 0x12 #define MII_M1011_IEVENT 0x13 #define MII_M1011_IEVENT_CLEAR 0x0000 @@ -155,6 +157,7 @@ #define MII_88E1318S_PHY_WOL_CTRL 0x10 #define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS BIT(12) +#define MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE BIT(13) #define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE BIT(14) #define MII_PHY_LED_CTRL 16 @@ -176,7 +179,14 @@ #define MII_88E1510_GEN_CTRL_REG_1 0x14 #define MII_88E1510_GEN_CTRL_REG_1_MODE_MASK 0x7 +#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII 0x0 /* RGMII to copper */ #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */ +/* RGMII to 1000BASE-X */ +#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X 0x2 +/* RGMII to 100BASE-FX */ +#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX 0x3 +/* RGMII to SGMII */ +#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII 0x4 #define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */ #define MII_VCT5_TX_RX_MDI0_COUPLING 0x10 @@ -1746,13 +1756,19 @@ static void m88e1318_get_wol(struct phy_device *phydev, { int ret; - wol->supported = WAKE_MAGIC; + wol->supported = WAKE_MAGIC | WAKE_PHY; wol->wolopts = 0; ret = phy_read_paged(phydev, MII_MARVELL_WOL_PAGE, MII_88E1318S_PHY_WOL_CTRL); - if (ret >= 0 && ret & MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE) + if (ret < 0) + return; + + if (ret & MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE) wol->wolopts |= WAKE_MAGIC; + + if (ret & MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE) + wol->wolopts |= WAKE_PHY; } static int m88e1318_set_wol(struct phy_device *phydev, @@ -1764,7 +1780,7 @@ static int m88e1318_set_wol(struct phy_device *phydev, if (oldpage < 0) goto error; - if (wol->wolopts & WAKE_MAGIC) { + if (wol->wolopts & (WAKE_MAGIC | WAKE_PHY)) { /* Explicitly switch to page 0x00, just to be sure */ err = marvell_write_page(phydev, MII_MARVELL_COPPER_PAGE); if (err < 0) @@ -1796,7 +1812,9 @@ static int m88e1318_set_wol(struct phy_device *phydev, MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW); if (err < 0) goto error; + } + if (wol->wolopts & WAKE_MAGIC) { err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE); if (err < 0) goto error; @@ -1837,6 +1855,30 @@ static int m88e1318_set_wol(struct phy_device *phydev, goto error; } + if (wol->wolopts & WAKE_PHY) { + err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE); + if (err < 0) + goto error; + + /* Clear WOL status and enable link up event */ + err = __phy_modify(phydev, MII_88E1318S_PHY_WOL_CTRL, 0, + MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS | + MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE); + if (err < 0) + goto error; + } else { + err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE); + if (err < 0) + goto error; + + /* Clear WOL status and disable link up event */ + err = __phy_modify(phydev, MII_88E1318S_PHY_WOL_CTRL, + MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE, + MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS); + if (err < 0) + goto error; + } + error: return phy_restore_page(phydev, oldpage, err); } @@ -2701,6 +2743,100 @@ static int marvell_probe(struct phy_device *phydev) return marvell_hwmon_probe(phydev); } +static int m88e1510_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) +{ + struct phy_device *phydev = upstream; + phy_interface_t interface; + struct device *dev; + int oldpage; + int ret = 0; + u16 mode; + + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; + + dev = &phydev->mdio.dev; + + sfp_parse_support(phydev->sfp_bus, id, supported); + interface = sfp_select_interface(phydev->sfp_bus, supported); + + dev_info(dev, "%s SFP module inserted\n", phy_modes(interface)); + + switch (interface) { + case PHY_INTERFACE_MODE_1000BASEX: + mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X; + + break; + case PHY_INTERFACE_MODE_100BASEX: + mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX; + + break; + case PHY_INTERFACE_MODE_SGMII: + mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII; + + break; + default: + dev_err(dev, "Incompatible SFP module inserted\n"); + + return -EINVAL; + } + + oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE); + if (oldpage < 0) + goto error; + + ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1, + MII_88E1510_GEN_CTRL_REG_1_MODE_MASK, mode); + if (ret < 0) + goto error; + + ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1, + MII_88E1510_GEN_CTRL_REG_1_RESET); + +error: + return phy_restore_page(phydev, oldpage, ret); +} + +static void m88e1510_sfp_remove(void *upstream) +{ + struct phy_device *phydev = upstream; + int oldpage; + int ret = 0; + + oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE); + if (oldpage < 0) + goto error; + + ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1, + MII_88E1510_GEN_CTRL_REG_1_MODE_MASK, + MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII); + if (ret < 0) + goto error; + + ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1, + MII_88E1510_GEN_CTRL_REG_1_RESET); + +error: + phy_restore_page(phydev, oldpage, ret); +} + +static const struct sfp_upstream_ops m88e1510_sfp_ops = { + .module_insert = m88e1510_sfp_insert, + .module_remove = m88e1510_sfp_remove, + .attach = phy_sfp_attach, + .detach = phy_sfp_detach, +}; + +static int m88e1510_probe(struct phy_device *phydev) +{ + int err; + + err = marvell_probe(phydev); + if (err) + return err; + + return phy_sfp_probe(phydev, &m88e1510_sfp_ops); +} + static struct phy_driver marvell_drivers[] = { { .phy_id = MARVELL_PHY_ID_88E1101, @@ -2927,7 +3063,7 @@ static struct phy_driver marvell_drivers[] = { .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops), .features = PHY_GBIT_FIBRE_FEATURES, .flags = PHY_POLL_CABLE_TEST, - .probe = marvell_probe, + .probe = m88e1510_probe, .config_init = m88e1510_config_init, .config_aneg = m88e1510_config_aneg, .read_status = marvell_read_status, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 4d53886f7d51..5c928f827173 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev) } static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, - const u32 ksz_phy_id) + const bool ksz_8051) { int ret; - if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id) + if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051) return 0; ret = phy_read(phydev, MII_BMSR); @@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, * the switch does not. */ ret &= BMSR_ERCAP; - if (ksz_phy_id == PHY_ID_KSZ8051) + if (ksz_8051) return ret; else return !ret; @@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, static int ksz8051_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051); + return ksz8051_ksz8795_match_phy_device(phydev, true); } static int ksz8081_config_init(struct phy_device *phydev) @@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev) static int ksz8795_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX); + return ksz8051_ksz8795_match_phy_device(phydev, false); } static int ksz9021_load_values_from_of(struct phy_device *phydev, @@ -1760,8 +1760,6 @@ static struct phy_driver ksphy_driver[] = { .name = "Micrel KSZ87XX Switch", /* PHY_BASIC_FEATURES */ .config_init = kszphy_config_init, - .config_aneg = ksz8873mll_config_aneg, - .read_status = ksz8873mll_read_status, .match_phy_device = ksz8795_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c index afd7afa1f498..9944cc501806 100644 --- a/drivers/net/phy/nxp-tja11xx.c +++ b/drivers/net/phy/nxp-tja11xx.c @@ -47,12 +47,14 @@ #define MII_INTSRC_LINK_FAIL BIT(10) #define MII_INTSRC_LINK_UP BIT(9) #define MII_INTSRC_MASK (MII_INTSRC_LINK_FAIL | MII_INTSRC_LINK_UP) -#define MII_INTSRC_TEMP_ERR BIT(1) #define MII_INTSRC_UV_ERR BIT(3) +#define MII_INTSRC_TEMP_ERR BIT(1) #define MII_INTEN 22 #define MII_INTEN_LINK_FAIL BIT(10) #define MII_INTEN_LINK_UP BIT(9) +#define MII_INTEN_UV_ERR BIT(3) +#define MII_INTEN_TEMP_ERR BIT(1) #define MII_COMMSTAT 23 #define MII_COMMSTAT_LINK_UP BIT(15) @@ -607,7 +609,8 @@ static int tja11xx_config_intr(struct phy_device *phydev) if (err) return err; - value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP; + value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP | + MII_INTEN_UV_ERR | MII_INTEN_TEMP_ERR; err = phy_write(phydev, MII_INTEN, value); } else { err = phy_write(phydev, MII_INTEN, value); @@ -622,6 +625,7 @@ static int tja11xx_config_intr(struct phy_device *phydev) static irqreturn_t tja11xx_handle_interrupt(struct phy_device *phydev) { + struct device *dev = &phydev->mdio.dev; int irq_status; irq_status = phy_read(phydev, MII_INTSRC); @@ -630,6 +634,11 @@ static irqreturn_t tja11xx_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } + if (irq_status & MII_INTSRC_TEMP_ERR) + dev_warn(dev, "Overtemperature error detected (temp > 155C°).\n"); + if (irq_status & MII_INTSRC_UV_ERR) + dev_warn(dev, "Undervoltage error detected.\n"); + if (!(irq_status & MII_INTSRC_MASK)) return IRQ_NONE; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 216a9f4e9750..e9e81573f21e 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -284,7 +284,7 @@ static struct channel *ppp_find_channel(struct ppp_net *pn, int unit); static int ppp_connect_channel(struct channel *pch, int unit); static int ppp_disconnect_channel(struct channel *pch); static void ppp_destroy_channel(struct channel *pch); -static int unit_get(struct idr *p, void *ptr); +static int unit_get(struct idr *p, void *ptr, int min); static int unit_set(struct idr *p, void *ptr, int n); static void unit_put(struct idr *p, int n); static void *unit_find(struct idr *p, int n); @@ -1155,9 +1155,20 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) mutex_lock(&pn->all_ppp_mutex); if (unit < 0) { - ret = unit_get(&pn->units_idr, ppp); + ret = unit_get(&pn->units_idr, ppp, 0); if (ret < 0) goto err; + if (!ifname_is_set) { + while (1) { + snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ret); + if (!__dev_get_by_name(ppp->ppp_net, ppp->dev->name)) + break; + unit_put(&pn->units_idr, ret); + ret = unit_get(&pn->units_idr, ppp, ret + 1); + if (ret < 0) + goto err; + } + } } else { /* Caller asked for a specific unit number. Fail with -EEXIST * if unavailable. For backward compatibility, return -EEXIST @@ -1306,7 +1317,7 @@ static int ppp_nl_newlink(struct net *src_net, struct net_device *dev, * the PPP unit identifer as suffix (i.e. ppp<unit_id>). This allows * userspace to infer the device name using to the PPPIOCGUNIT ioctl. */ - if (!tb[IFLA_IFNAME]) + if (!tb[IFLA_IFNAME] || !nla_len(tb[IFLA_IFNAME]) || !*(char *)nla_data(tb[IFLA_IFNAME])) conf.ifname_is_set = false; err = ppp_dev_configure(src_net, dev, &conf); @@ -3552,9 +3563,9 @@ static int unit_set(struct idr *p, void *ptr, int n) } /* get new free unit number and associate pointer with it */ -static int unit_get(struct idr *p, void *ptr) +static int unit_get(struct idr *p, void *ptr, int min) { - return idr_alloc(p, ptr, 0, 0, GFP_KERNEL); + return idr_alloc(p, ptr, min, 0, GFP_KERNEL); } /* put unit number back to a pool */ diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index ac92bc52a85e..38cda590895c 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -63,6 +63,29 @@ void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, value, index, data, size); } +static int asix_check_host_enable(struct usbnet *dev, int in_pm) +{ + int i, ret; + u8 smsr; + + for (i = 0; i < 30; ++i) { + ret = asix_set_sw_mii(dev, in_pm); + if (ret == -ENODEV || ret == -ETIMEDOUT) + break; + usleep_range(1000, 1100); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, in_pm); + if (ret == -ENODEV) + break; + else if (ret < 0) + continue; + else if (smsr & AX_HOST_EN) + break; + } + + return ret; +} + static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx) { /* Reset the variables that have a lifetime outside of @@ -467,19 +490,11 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; - u8 smsr; - int i = 0; int ret; mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 0); - if (ret == -ENODEV || ret == -ETIMEDOUT) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 0); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + + ret = asix_check_host_enable(dev, 0); if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; @@ -505,23 +520,14 @@ static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc, { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); - u8 smsr; - int i = 0; int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 0); - if (ret == -ENODEV) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 0); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + ret = asix_check_host_enable(dev, 0); if (ret == -ENODEV) goto out; @@ -561,19 +567,11 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; - u8 smsr; - int i = 0; int ret; mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 1); - if (ret == -ENODEV || ret == -ETIMEDOUT) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 1); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + + ret = asix_check_host_enable(dev, 1); if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; @@ -595,22 +593,14 @@ asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); - u8 smsr; - int i = 0; int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 1); - if (ret == -ENODEV) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 1); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + + ret = asix_check_host_enable(dev, 1); if (ret == -ENODEV) { mutex_unlock(&dev->phy_mutex); return; diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 827d574f764a..24bc1e678b7b 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1079,8 +1079,7 @@ static void hso_init_termios(struct ktermios *termios) tty_termios_encode_baud_rate(termios, 115200, 115200); } -static void _hso_serial_set_termios(struct tty_struct *tty, - struct ktermios *old) +static void _hso_serial_set_termios(struct tty_struct *tty) { struct hso_serial *serial = tty->driver_data; @@ -1262,7 +1261,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) if (serial->port.count == 1) { serial->rx_state = RX_IDLE; /* Force default termio settings */ - _hso_serial_set_termios(tty, NULL); + _hso_serial_set_termios(tty); tasklet_setup(&serial->unthrottle_tasklet, hso_unthrottle_tasklet); result = hso_start_serial_device(serial->parent, GFP_KERNEL); @@ -1394,7 +1393,7 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old) /* the actual setup */ spin_lock_irqsave(&serial->serial_lock, flags); if (serial->port.count) - _hso_serial_set_termios(tty, old); + _hso_serial_set_termios(tty); else tty->termios = *old; spin_unlock_irqrestore(&serial->serial_lock, flags); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 13f86368b78a..4e8d3c28f73e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; struct ethtool_link_ksettings ecmd; - int ladv, radv, ret; + int ladv, radv, ret, link; u32 buf; /* clear LAN78xx interrupt status */ @@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (unlikely(ret < 0)) return -EIO; + mutex_lock(&phydev->lock); phy_read_status(phydev); + link = phydev->link; + mutex_unlock(&phydev->lock); - if (!phydev->link && dev->link_on) { + if (!link && dev->link_on) { dev->link_on = false; /* reset MAC */ @@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) return -EIO; del_timer(&dev->stat_monitor); - } else if (phydev->link && !dev->link_on) { + } else if (link && !dev->link_on) { dev->link_on = true; phy_ethtool_ksettings_get(phydev, &ecmd); @@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata) static u32 lan78xx_get_link(struct net_device *net) { + u32 link; + + mutex_lock(&net->phydev->lock); phy_read_status(net->phydev); + link = net->phydev->link; + mutex_unlock(&net->phydev->lock); - return net->phydev->link; + return link; } static void lan78xx_get_drvinfo(struct net_device *net, diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 0475ef0efdca..36dafcb3d04a 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1,31 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com) + * Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com) * - * ChangeLog: - * .... Most of the time spent on reading sources & docs. - * v0.2.x First official release for the Linux kernel. - * v0.3.0 Beutified and structured, some bugs fixed. - * v0.3.x URBifying bulk requests and bugfixing. First relatively - * stable release. Still can touch device's registers only - * from top-halves. - * v0.4.0 Control messages remained unurbified are now URBs. - * Now we can touch the HW at any time. - * v0.4.9 Control urbs again use process context to wait. Argh... - * Some long standing bugs (enable_net_traffic) fixed. - * Also nasty trick about resubmiting control urb from - * interrupt context used. Please let me know how it - * behaves. Pegasus II support added since this version. - * TODO: suppressing HCD warnings spewage on disconnect. - * v0.4.13 Ethernet address is now set at probe(), not at open() - * time as this seems to break dhcpd. - * v0.5.0 branch to 2.5.x kernels - * v0.5.1 ethtool support added - * v0.5.5 rx socket buffers are in a pool and the their allocation - * is out of the interrupt routine. - * ... - * v0.9.3 simplified [get|set]_register(s), async update registers - * logic revisited, receive skb_pool removed. */ #include <linux/sched.h> @@ -45,7 +21,6 @@ /* * Version Information */ -#define DRIVER_VERSION "v0.9.3 (2013/04/25)" #define DRIVER_AUTHOR "Petko Manolov <petkan@nucleusys.com>" #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver" @@ -132,9 +107,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, const void *data) { - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, + int ret; + + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, indx, data, size, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } /* @@ -145,10 +126,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { void *buf = &data; + int ret; - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, indx, buf, 1, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } static int update_eth_regs_async(pegasus_t *pegasus) @@ -188,10 +174,9 @@ static int update_eth_regs_async(pegasus_t *pegasus) static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) { - int i; - __u8 data[4] = { phy, 0, 0, indx }; + int i, ret; __le16 regdi; - int ret = -ETIMEDOUT; + __u8 data[4] = { phy, 0, 0, indx }; if (cmd & PHY_WRITE) { __le16 *t = (__le16 *) & data[1]; @@ -207,12 +192,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) if (data[0] & PHY_DONE) break; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } if (cmd & PHY_READ) { ret = get_registers(p, PhyData, 2, ®di); + if (ret < 0) + goto fail; *regd = le16_to_cpu(regdi); - return ret; } return 0; fail: @@ -235,9 +223,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd) static int mdio_read(struct net_device *dev, int phy_id, int loc) { pegasus_t *pegasus = netdev_priv(dev); + int ret; u16 res; - read_mii_word(pegasus, phy_id, loc, &res); + ret = read_mii_word(pegasus, phy_id, loc, &res); + if (ret < 0) + return ret; + return (int)res; } @@ -251,10 +243,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) { - int i; - __u8 tmp = 0; + int ret, i; __le16 retdatai; - int ret; + __u8 tmp = 0; set_register(pegasus, EpromCtrl, 0); set_register(pegasus, EpromOffset, index); @@ -262,21 +253,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) for (i = 0; i < REG_TIMEOUT; i++) { ret = get_registers(pegasus, EpromCtrl, 1, &tmp); + if (ret < 0) + goto fail; if (tmp & EPROM_DONE) break; - if (ret == -ESHUTDOWN) - goto fail; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } ret = get_registers(pegasus, EpromData, 2, &retdatai); + if (ret < 0) + goto fail; *retdata = le16_to_cpu(retdatai); return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); - return -ETIMEDOUT; + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } #ifdef PEGASUS_WRITE_EEPROM @@ -324,10 +319,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data) return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return -ETIMEDOUT; } -#endif /* PEGASUS_WRITE_EEPROM */ +#endif /* PEGASUS_WRITE_EEPROM */ static inline int get_node_id(pegasus_t *pegasus, u8 *id) { @@ -367,19 +362,21 @@ static void set_ethernet_addr(pegasus_t *pegasus) return; err: eth_hw_addr_random(pegasus->net); - dev_info(&pegasus->intf->dev, "software assigned MAC address.\n"); + netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n"); return; } static inline int reset_mac(pegasus_t *pegasus) { + int ret, i; __u8 data = 0x8; - int i; set_register(pegasus, EthCtrl1, data); for (i = 0; i < REG_TIMEOUT; i++) { - get_registers(pegasus, EthCtrl1, 1, &data); + ret = get_registers(pegasus, EthCtrl1, 1, &data); + if (ret < 0) + goto fail; if (~data & 0x08) { if (loopback) break; @@ -402,22 +399,29 @@ static inline int reset_mac(pegasus_t *pegasus) } if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) { __u16 auxmode; - read_mii_word(pegasus, 3, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 3, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 3, 0x1b, &auxmode); } return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) { - __u16 linkpart; - __u8 data[4]; pegasus_t *pegasus = netdev_priv(dev); int ret; + __u16 linkpart; + __u8 data[4]; - read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + if (ret < 0) + goto fail; data[0] = 0xc8; /* TX & RX enable, append status, no CRC */ data[1] = 0; if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL)) @@ -435,11 +439,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 || usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) { u16 auxmode; - read_mii_word(pegasus, 0, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 0, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 0, 0x1b, &auxmode); } + return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return ret; } @@ -447,9 +456,9 @@ static void read_bulk_callback(struct urb *urb) { pegasus_t *pegasus = urb->context; struct net_device *net; + u8 *buf = urb->transfer_buffer; int rx_status, count = urb->actual_length; int status = urb->status; - u8 *buf = urb->transfer_buffer; __u16 pkt_len; if (!pegasus) @@ -735,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus) set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp); } -static inline void get_interrupt_interval(pegasus_t *pegasus) +static inline int get_interrupt_interval(pegasus_t *pegasus) { u16 data; u8 interval; + int ret; + + ret = read_eprom_word(pegasus, 4, &data); + if (ret < 0) + return ret; - read_eprom_word(pegasus, 4, &data); interval = data >> 8; if (pegasus->usb->speed != USB_SPEED_HIGH) { if (interval < 0x80) { @@ -755,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus) } } pegasus->intr_interval = interval; + + return 0; } static void set_carrier(struct net_device *net) @@ -880,7 +895,6 @@ static void pegasus_get_drvinfo(struct net_device *dev, pegasus_t *pegasus = netdev_priv(dev); strlcpy(info->driver, driver_name, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info)); } @@ -999,8 +1013,7 @@ static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq, data[0] = pegasus->phy; fallthrough; case SIOCDEVPRIVATE + 1: - read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); - res = 0; + res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); break; case SIOCDEVPRIVATE + 2: if (!capable(CAP_NET_ADMIN)) @@ -1034,22 +1047,25 @@ static void pegasus_set_multicast(struct net_device *net) static __u8 mii_phy_probe(pegasus_t *pegasus) { - int i; + int i, ret; __u16 tmp; for (i = 0; i < 32; i++) { - read_mii_word(pegasus, i, MII_BMSR, &tmp); + ret = read_mii_word(pegasus, i, MII_BMSR, &tmp); + if (ret < 0) + goto fail; if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0) continue; else return i; } - +fail: return 0xff; } static inline void setup_pegasus_II(pegasus_t *pegasus) { + int ret; __u8 data = 0xa5; set_register(pegasus, Reg1d, 0); @@ -1061,7 +1077,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg7b, 2); set_register(pegasus, 0x83, data); - get_registers(pegasus, 0x83, 1, &data); + ret = get_registers(pegasus, 0x83, 1, &data); + if (ret < 0) + goto fail; if (data == 0xa5) pegasus->chip = 0x8513; @@ -1076,6 +1094,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg81, 6); else set_register(pegasus, Reg81, 2); + + return; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); } static void check_carrier(struct work_struct *work) @@ -1150,7 +1172,9 @@ static int pegasus_probe(struct usb_interface *intf, | NETIF_MSG_PROBE | NETIF_MSG_LINK); pegasus->features = usb_dev_id[dev_index].private; - get_interrupt_interval(pegasus); + res = get_interrupt_interval(pegasus); + if (res) + goto out2; if (reset_mac(pegasus)) { dev_err(&intf->dev, "can't reset MAC\n"); res = -EIO; @@ -1297,7 +1321,7 @@ static void __init parse_id(char *id) static int __init pegasus_init(void) { - pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION); + pr_info("%s: " DRIVER_DESC "\n", driver_name); if (devid) parse_id(devid); return usb_register(&pegasus_driver); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d7fbc81b518a..aa66671c484d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3955,17 +3955,28 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type) case RTL_VER_06: ocp_write_byte(tp, type, PLA_BP_EN, 0); break; + case RTL_VER_14: + ocp_write_word(tp, type, USB_BP2_EN, 0); + + ocp_write_word(tp, type, USB_BP_8, 0); + ocp_write_word(tp, type, USB_BP_9, 0); + ocp_write_word(tp, type, USB_BP_10, 0); + ocp_write_word(tp, type, USB_BP_11, 0); + ocp_write_word(tp, type, USB_BP_12, 0); + ocp_write_word(tp, type, USB_BP_13, 0); + ocp_write_word(tp, type, USB_BP_14, 0); + ocp_write_word(tp, type, USB_BP_15, 0); + break; case RTL_VER_08: case RTL_VER_09: case RTL_VER_10: case RTL_VER_11: case RTL_VER_12: case RTL_VER_13: - case RTL_VER_14: case RTL_VER_15: default: if (type == MCU_TYPE_USB) { - ocp_write_byte(tp, MCU_TYPE_USB, USB_BP2_EN, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP2_EN, 0); ocp_write_word(tp, MCU_TYPE_USB, USB_BP_8, 0); ocp_write_word(tp, MCU_TYPE_USB, USB_BP_9, 0); @@ -4331,7 +4342,6 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac) case RTL_VER_11: case RTL_VER_12: case RTL_VER_13: - case RTL_VER_14: case RTL_VER_15: fw_reg = 0xf800; bp_ba_addr = PLA_BP_BA; @@ -4339,6 +4349,13 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac) bp_start = PLA_BP_0; max_bp = 8; break; + case RTL_VER_14: + fw_reg = 0xf800; + bp_ba_addr = PLA_BP_BA; + bp_en_addr = USB_BP2_EN; + bp_start = PLA_BP_0; + max_bp = 16; + break; default: goto out; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2e42210a6503..c8c9ad7ca2b5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -63,7 +63,7 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM }; -#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ +#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ (1ULL << VIRTIO_NET_F_GUEST_UFO)) @@ -2504,7 +2504,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { - NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); return -EOPNOTSUPP; } @@ -2635,15 +2635,15 @@ static int virtnet_set_features(struct net_device *dev, u64 offloads; int err; - if ((dev->features ^ features) & NETIF_F_LRO) { + if ((dev->features ^ features) & NETIF_F_GRO_HW) { if (vi->xdp_enabled) return -EBUSY; - if (features & NETIF_F_LRO) + if (features & NETIF_F_GRO_HW) offloads = vi->guest_offloads_capable; else offloads = vi->guest_offloads_capable & - ~GUEST_OFFLOAD_LRO_MASK; + ~GUEST_OFFLOAD_GRO_HW_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) @@ -3123,9 +3123,9 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_RXCSUM; if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) - dev->features |= NETIF_F_LRO; + dev->features |= NETIF_F_GRO_HW; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) - dev->hw_features |= NETIF_F_LRO; + dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 726adf07ef31..bf2fac913942 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -864,7 +864,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); if (!skb) { - skb->dev->stats.tx_errors++; + dev->stats.tx_errors++; return -ENOMEM; } } @@ -1360,6 +1360,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); + nf_reset_ct(skb); + /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For strict packets with a source LLA, determine the dst using the @@ -1422,6 +1424,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; + nf_reset_ct(skb); + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index 2403490cbc26..b4b1f75b9c2a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -37,6 +37,7 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, u32 sha1 = 0; u16 mac_type = 0, rf_id = 0; u8 *pnvm_data = NULL, *tmp; + bool hw_match = false; u32 size = 0; int ret; @@ -83,6 +84,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, break; } + if (hw_match) + break; + mac_type = le16_to_cpup((__le16 *)data); rf_id = le16_to_cpup((__le16 *)(data + sizeof(__le16))); @@ -90,15 +94,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, "Got IWL_UCODE_TLV_HW_TYPE mac_type 0x%0x rf_id 0x%0x\n", mac_type, rf_id); - if (mac_type != CSR_HW_REV_TYPE(trans->hw_rev) || - rf_id != CSR_HW_RFID_TYPE(trans->hw_rf_id)) { - IWL_DEBUG_FW(trans, - "HW mismatch, skipping PNVM section, mac_type 0x%0x, rf_id 0x%0x.\n", - CSR_HW_REV_TYPE(trans->hw_rev), trans->hw_rf_id); - ret = -ENOENT; - goto out; - } - + if (mac_type == CSR_HW_REV_TYPE(trans->hw_rev) && + rf_id == CSR_HW_RFID_TYPE(trans->hw_rf_id)) + hw_match = true; break; case IWL_UCODE_TLV_SEC_RT: { struct iwl_pnvm_section *section = (void *)data; @@ -149,6 +147,15 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, } done: + if (!hw_match) { + IWL_DEBUG_FW(trans, + "HW mismatch, skipping PNVM section (need mac_type 0x%x rf_id 0x%x)\n", + CSR_HW_REV_TYPE(trans->hw_rev), + CSR_HW_RFID_TYPE(trans->hw_rf_id)); + ret = -ENOENT; + goto out; + } + if (!size) { IWL_DEBUG_FW(trans, "Empty PNVM, skipping.\n"); ret = -ENOENT; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 16baee3d52ae..0b8a0cd3b652 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1110,12 +1110,80 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_bz_a0_mr_a0, iwl_ax211_name), +/* SoF with JF2 */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), + +/* SoF with JF */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), + /* So with GF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, - iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name) + iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), + +/* So with JF2 */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), + +/* So with JF */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_name) #endif /* CONFIG_IWLMVM */ }; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 863aa18b3024..43960770a9af 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -111,7 +111,7 @@ mt7915_mcu_get_cipher(int cipher) case WLAN_CIPHER_SUITE_SMS4: return MCU_CIPHER_WAPI; default: - return MT_CIPHER_NONE; + return MCU_CIPHER_NONE; } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h index edd3ba3a0c2d..e68a562cc5b4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h @@ -1073,7 +1073,8 @@ enum { }; enum mcu_cipher_type { - MCU_CIPHER_WEP40 = 1, + MCU_CIPHER_NONE = 0, + MCU_CIPHER_WEP40, MCU_CIPHER_WEP104, MCU_CIPHER_WEP128, MCU_CIPHER_TKIP, diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index cd690c64f65b..9fbaacc67cfa 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -111,7 +111,7 @@ mt7921_mcu_get_cipher(int cipher) case WLAN_CIPHER_SUITE_SMS4: return MCU_CIPHER_WAPI; default: - return MT_CIPHER_NONE; + return MCU_CIPHER_NONE; } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h index d76cf8f8dfdf..de3c091f6736 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h @@ -199,7 +199,8 @@ struct sta_rec_sec { } __packed; enum mcu_cipher_type { - MCU_CIPHER_WEP40 = 1, + MCU_CIPHER_NONE = 0, + MCU_CIPHER_WEP40, MCU_CIPHER_WEP104, MCU_CIPHER_WEP128, MCU_CIPHER_TKIP, diff --git a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c index 804e6c4f2c78..519361ec40df 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c +++ b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c @@ -64,10 +64,9 @@ static struct ipc_chnl_cfg modem_cfg[] = { int ipc_chnl_cfg_get(struct ipc_chnl_cfg *chnl_cfg, int index) { - int array_size = ARRAY_SIZE(modem_cfg); - - if (index >= array_size) { - pr_err("index: %d and array_size %d", index, array_size); + if (index >= ARRAY_SIZE(modem_cfg)) { + pr_err("index: %d and array size %zu", index, + ARRAY_SIZE(modem_cfg)); return -ECHRNG; } diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h index 45e6923da78f..f861994a6d90 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h @@ -10,10 +10,10 @@ #define IOSM_CP_VERSION 0x0100UL /* DL dir Aggregation support mask */ -#define DL_AGGR BIT(23) +#define DL_AGGR BIT(9) /* UL dir Aggregation support mask */ -#define UL_AGGR BIT(22) +#define UL_AGGR BIT(8) /* UL flow credit support mask */ #define UL_FLOW_CREDIT BIT(21) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index 562de275797a..bdb2d32cdb6d 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, return; } - ul_credits = fct->vfl.nr_of_bytes; + ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes); dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d", if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits); @@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux) qlt->reserved[0] = 0; qlt->reserved[1] = 0; - qlt->vfl.nr_of_bytes = session->ul_list.qlen; + qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen); /* Add QLT to the transfer list. */ skb_queue_tail(&ipc_mux->channel->ul_list, diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h index 4a74e3c9457f..aae83db5cbb8 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h @@ -106,7 +106,7 @@ struct mux_lite_cmdh { * @nr_of_bytes: Number of bytes available to transmit in the queue. */ struct mux_lite_vfl { - u32 nr_of_bytes; + __le32 nr_of_bytes; }; /** diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c index 91109e27efd3..35d590743d3a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c @@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol, } if (p_td->buffer.address != IPC_CB(skb)->mapping) { - dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p", - (void *)p_td->buffer.address, skb->data); + dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p", + (unsigned long long)p_td->buffer.address, skb->data); ipc_pcie_kfree_skb(ipc_protocol->pcie, skb); skb = NULL; goto ret; diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index b2357ad5d517..b571d9cedba4 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev, RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL); /* unregistering includes synchronize_net() */ - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); unlock: mutex_unlock(&ipc_wwan->if_mutex); diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index 1bc6b69aa530..d0a98f34c54d 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -41,14 +41,14 @@ struct mhi_wwan_dev { /* Increment RX budget and schedule RX refill if necessary */ static void mhi_wwan_rx_budget_inc(struct mhi_wwan_dev *mhiwwan) { - spin_lock(&mhiwwan->rx_lock); + spin_lock_bh(&mhiwwan->rx_lock); mhiwwan->rx_budget++; if (test_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags)) schedule_work(&mhiwwan->rx_refill); - spin_unlock(&mhiwwan->rx_lock); + spin_unlock_bh(&mhiwwan->rx_lock); } /* Decrement RX budget if non-zero and return true on success */ @@ -56,7 +56,7 @@ static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan) { bool ret = false; - spin_lock(&mhiwwan->rx_lock); + spin_lock_bh(&mhiwwan->rx_lock); if (mhiwwan->rx_budget) { mhiwwan->rx_budget--; @@ -64,7 +64,7 @@ static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan) ret = true; } - spin_unlock(&mhiwwan->rx_lock); + spin_unlock_bh(&mhiwwan->rx_lock); return ret; } @@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port) int ret; /* Start mhi device's channel(s) */ - ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev); + ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0); if (ret) return ret; @@ -130,9 +130,9 @@ static void mhi_wwan_ctrl_stop(struct wwan_port *port) { struct mhi_wwan_dev *mhiwwan = wwan_port_get_drvdata(port); - spin_lock(&mhiwwan->rx_lock); + spin_lock_bh(&mhiwwan->rx_lock); clear_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags); - spin_unlock(&mhiwwan->rx_lock); + spin_unlock_bh(&mhiwwan->rx_lock); cancel_work_sync(&mhiwwan->rx_refill); diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index bb88c3883fe8..377529bbf124 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -601,6 +601,7 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id if (!mbim) return -ENOMEM; + spin_lock_init(&mbim->tx_lock); dev_set_drvdata(&mhi_dev->dev, mbim); mbim->mdev = mhi_dev; mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU; @@ -608,7 +609,7 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work); /* Start MHI channels */ - err = mhi_prepare_for_transfer(mhi_dev); + err = mhi_prepare_for_transfer(mhi_dev, 0); if (err) return err; diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 674a81d79db3..d293ab688044 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -164,11 +164,14 @@ static struct wwan_device *wwan_create_dev(struct device *parent) goto done_unlock; id = ida_alloc(&wwan_dev_ids, GFP_KERNEL); - if (id < 0) + if (id < 0) { + wwandev = ERR_PTR(id); goto done_unlock; + } wwandev = kzalloc(sizeof(*wwandev), GFP_KERNEL); if (!wwandev) { + wwandev = ERR_PTR(-ENOMEM); ida_free(&wwan_dev_ids, id); goto done_unlock; } @@ -182,7 +185,8 @@ static struct wwan_device *wwan_create_dev(struct device *parent) err = device_register(&wwandev->dev); if (err) { put_device(&wwandev->dev); - wwandev = NULL; + wwandev = ERR_PTR(err); + goto done_unlock; } done_unlock: @@ -355,8 +359,8 @@ struct wwan_port *wwan_create_port(struct device *parent, { struct wwan_device *wwandev; struct wwan_port *port; - int minor, err = -ENOMEM; char namefmt[0x20]; + int minor, err; if (type > WWAN_PORT_MAX || !ops) return ERR_PTR(-EINVAL); @@ -370,11 +374,14 @@ struct wwan_port *wwan_create_port(struct device *parent, /* A port is exposed as character device, get a minor */ minor = ida_alloc_range(&minors, 0, WWAN_MAX_MINORS - 1, GFP_KERNEL); - if (minor < 0) + if (minor < 0) { + err = minor; goto error_wwandev_remove; + } port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) { + err = -ENOMEM; ida_free(&minors, minor); goto error_wwandev_remove; } @@ -1014,8 +1021,8 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops, return -EINVAL; wwandev = wwan_create_dev(parent); - if (!wwandev) - return -ENOMEM; + if (IS_ERR(wwandev)) + return PTR_ERR(wwandev); if (WARN_ON(wwandev->ops)) { wwan_remove_dev(wwandev); diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index 2ee0ec4bb739..221fa3bb8705 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/miscdevice.h> #include <linux/mutex.h> +#include <linux/wait.h> #include <net/nfc/nci_core.h> enum virtual_ncidev_mode { @@ -27,6 +28,7 @@ enum virtual_ncidev_mode { NFC_PROTO_ISO15693_MASK) static enum virtual_ncidev_mode state; +static DECLARE_WAIT_QUEUE_HEAD(wq); static struct miscdevice miscdev; static struct sk_buff *send_buff; static struct nci_dev *ndev; @@ -61,6 +63,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) } send_buff = skb_copy(skb, GFP_KERNEL); mutex_unlock(&nci_mutex); + wake_up_interruptible(&wq); return 0; } @@ -77,9 +80,11 @@ static ssize_t virtual_ncidev_read(struct file *file, char __user *buf, size_t actual_len; mutex_lock(&nci_mutex); - if (!send_buff) { + while (!send_buff) { mutex_unlock(&nci_mutex); - return 0; + if (wait_event_interruptible(wq, send_buff)) + return -EFAULT; + mutex_lock(&nci_mutex); } actual_len = min_t(size_t, count, send_buff->len); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 2403b71b601e..745478213ff2 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2527,7 +2527,7 @@ static void deactivate_labels(void *region) static int init_active_labels(struct nd_region *nd_region) { - int i; + int i, rc = 0; for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; @@ -2546,13 +2546,14 @@ static int init_active_labels(struct nd_region *nd_region) else if (test_bit(NDD_LABELING, &nvdimm->flags)) /* fail, labels needed to disambiguate dpa */; else - return 0; + continue; dev_err(&nd_region->dev, "%s: is %s, failing probe\n", dev_name(&nd_mapping->nvdimm->dev), test_bit(NDD_LOCKED, &nvdimm->flags) ? "locked" : "disabled"); - return -ENXIO; + rc = -ENXIO; + goto out; } nd_mapping->ndd = ndd; atomic_inc(&nvdimm->busy); @@ -2586,13 +2587,17 @@ static int init_active_labels(struct nd_region *nd_region) break; } - if (i < nd_region->ndr_mappings) { + if (i < nd_region->ndr_mappings) + rc = -ENOMEM; + +out: + if (rc) { deactivate_labels(nd_region); - return -ENOMEM; + return rc; } return devm_add_action_or_reset(&nd_region->dev, deactivate_labels, - nd_region); + nd_region); } int nd_region_register_namespaces(struct nd_region *nd_region, int *err) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9232255c8515..e5e75331b415 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -143,24 +143,25 @@ static inline __attribute_const__ u32 msi_mask(unsigned x) * reliably as devices without an INTx disable bit will then generate a * level IRQ which will never be cleared. */ -u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) +void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - u32 mask_bits = desc->masked; + raw_spinlock_t *lock = &desc->dev->msi_lock; + unsigned long flags; if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit) - return 0; + return; - mask_bits &= ~mask; - mask_bits |= flag; + raw_spin_lock_irqsave(lock, flags); + desc->masked &= ~mask; + desc->masked |= flag; pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->mask_pos, - mask_bits); - - return mask_bits; + desc->masked); + raw_spin_unlock_irqrestore(lock, flags); } static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag); + __pci_msi_desc_mask_irq(desc, mask, flag); } static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) @@ -289,13 +290,31 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) /* Don't touch the hardware now */ } else if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + bool unmasked = !(entry->masked & PCI_MSIX_ENTRY_CTRL_MASKBIT); if (!base) goto skip; + /* + * The specification mandates that the entry is masked + * when the message is modified: + * + * "If software changes the Address or Data value of an + * entry while the entry is unmasked, the result is + * undefined." + */ + if (unmasked) + __pci_msix_desc_mask_irq(entry, PCI_MSIX_ENTRY_CTRL_MASKBIT); + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); writel(msg->data, base + PCI_MSIX_ENTRY_DATA); + + if (unmasked) + __pci_msix_desc_mask_irq(entry, 0); + + /* Ensure that the writes are visible in the device */ + readl(base + PCI_MSIX_ENTRY_DATA); } else { int pos = dev->msi_cap; u16 msgctl; @@ -316,6 +335,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data); } + /* Ensure that the writes are visible in the device */ + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); } skip: @@ -636,21 +657,21 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, /* Configure MSI capability structure */ ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { - msi_mask_irq(entry, mask, ~mask); + msi_mask_irq(entry, mask, 0); free_msi_irqs(dev); return ret; } ret = msi_verify_entries(dev); if (ret) { - msi_mask_irq(entry, mask, ~mask); + msi_mask_irq(entry, mask, 0); free_msi_irqs(dev); return ret; } ret = populate_msi_sysfs(dev); if (ret) { - msi_mask_irq(entry, mask, ~mask); + msi_mask_irq(entry, mask, 0); free_msi_irqs(dev); return ret; } @@ -691,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, { struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; + void __iomem *addr; int ret, i; int vec_count = pci_msix_vec_count(dev); @@ -711,6 +733,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; + if (entries) entry->msi_attrib.entry_nr = entries[i].entry; else @@ -722,6 +745,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; + addr = pci_msix_desc_addr(entry); + if (addr) + entry->masked = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); + list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); if (masks) curmsk++; @@ -732,26 +759,25 @@ out: return ret; } -static void msix_program_entries(struct pci_dev *dev, - struct msix_entry *entries) +static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) { struct msi_desc *entry; - int i = 0; - void __iomem *desc_addr; for_each_pci_msi_entry(entry, dev) { - if (entries) - entries[i++].vector = entry->irq; + if (entries) { + entries->vector = entry->irq; + entries++; + } + } +} - desc_addr = pci_msix_desc_addr(entry); - if (desc_addr) - entry->masked = readl(desc_addr + - PCI_MSIX_ENTRY_VECTOR_CTRL); - else - entry->masked = 0; +static void msix_mask_all(void __iomem *base, int tsize) +{ + u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; + int i; - msix_mask_irq(entry, 1); - } + for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) + writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); } /** @@ -768,22 +794,33 @@ static void msix_program_entries(struct pci_dev *dev, static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd) { - int ret; - u16 control; void __iomem *base; + int ret, tsize; + u16 control; - /* Ensure MSI-X is disabled while it is set up */ - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + /* + * Some devices require MSI-X to be enabled before the MSI-X + * registers can be accessed. Mask all the vectors to prevent + * interrupts coming in before they're fully set up. + */ + pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL | + PCI_MSIX_FLAGS_ENABLE); pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); /* Request & Map MSI-X table region */ - base = msix_map_region(dev, msix_table_size(control)); - if (!base) - return -ENOMEM; + tsize = msix_table_size(control); + base = msix_map_region(dev, tsize); + if (!base) { + ret = -ENOMEM; + goto out_disable; + } + + /* Ensure that all table entries are masked. */ + msix_mask_all(base, tsize); ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) - return ret; + goto out_disable; ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); if (ret) @@ -794,15 +831,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, if (ret) goto out_free; - /* - * Some devices require MSI-X to be enabled before we can touch the - * MSI-X registers. We need to mask all the vectors to prevent - * interrupts coming in before they're fully set up. - */ - pci_msix_clear_and_set_ctrl(dev, 0, - PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE); - - msix_program_entries(dev, entries); + msix_update_entries(dev, entries); ret = populate_msi_sysfs(dev); if (ret) @@ -836,6 +865,9 @@ out_avail: out_free: free_msi_irqs(dev); +out_disable: + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + return ret; } @@ -930,8 +962,7 @@ static void pci_msi_shutdown(struct pci_dev *dev) /* Return the device with MSI unmasked as initial states */ mask = msi_mask(desc->msi_attrib.multi_cap); - /* Keep cached state to be restored */ - __pci_msi_desc_mask_irq(desc, mask, ~mask); + msi_mask_irq(desc, mask, 0); /* Restore dev->irq to its default pin-assertion IRQ */ dev->irq = desc->msi_attrib.default_irq; @@ -1016,10 +1047,8 @@ static void pci_msix_shutdown(struct pci_dev *dev) } /* Return the device with MSI-X masked as initial states */ - for_each_pci_msi_entry(entry, dev) { - /* Keep cached states to be restored */ + for_each_pci_msi_entry(entry, dev) __pci_msix_desc_mask_irq(entry, 1); - } pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); pci_intx_for_msi(dev, 1); diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c index 85887d885b5f..192c9049d654 100644 --- a/drivers/pcmcia/i82092.c +++ b/drivers/pcmcia/i82092.c @@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev, for (i = 0; i < socket_count; i++) { sockets[i].card_state = 1; /* 1 = present but empty */ sockets[i].io_base = pci_resource_start(dev, 0); + sockets[i].dev = dev; sockets[i].socket.features |= SS_CAP_PCCARD; sockets[i].socket.map_size = 0x1000; sockets[i].socket.irq_mask = 0; diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c index 3e4ef2b87526..0bcd19597e4a 100644 --- a/drivers/pinctrl/intel/pinctrl-tigerlake.c +++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c @@ -701,32 +701,32 @@ static const struct pinctrl_pin_desc tglh_pins[] = { static const struct intel_padgroup tglh_community0_gpps[] = { TGL_GPP(0, 0, 24, 0), /* GPP_A */ - TGL_GPP(1, 25, 44, 128), /* GPP_R */ - TGL_GPP(2, 45, 70, 32), /* GPP_B */ - TGL_GPP(3, 71, 78, INTEL_GPIO_BASE_NOMAP), /* vGPIO_0 */ + TGL_GPP(1, 25, 44, 32), /* GPP_R */ + TGL_GPP(2, 45, 70, 64), /* GPP_B */ + TGL_GPP(3, 71, 78, 96), /* vGPIO_0 */ }; static const struct intel_padgroup tglh_community1_gpps[] = { - TGL_GPP(0, 79, 104, 96), /* GPP_D */ - TGL_GPP(1, 105, 128, 64), /* GPP_C */ - TGL_GPP(2, 129, 136, 160), /* GPP_S */ - TGL_GPP(3, 137, 153, 192), /* GPP_G */ - TGL_GPP(4, 154, 180, 224), /* vGPIO */ + TGL_GPP(0, 79, 104, 128), /* GPP_D */ + TGL_GPP(1, 105, 128, 160), /* GPP_C */ + TGL_GPP(2, 129, 136, 192), /* GPP_S */ + TGL_GPP(3, 137, 153, 224), /* GPP_G */ + TGL_GPP(4, 154, 180, 256), /* vGPIO */ }; static const struct intel_padgroup tglh_community3_gpps[] = { - TGL_GPP(0, 181, 193, 256), /* GPP_E */ - TGL_GPP(1, 194, 217, 288), /* GPP_F */ + TGL_GPP(0, 181, 193, 288), /* GPP_E */ + TGL_GPP(1, 194, 217, 320), /* GPP_F */ }; static const struct intel_padgroup tglh_community4_gpps[] = { - TGL_GPP(0, 218, 241, 320), /* GPP_H */ + TGL_GPP(0, 218, 241, 352), /* GPP_H */ TGL_GPP(1, 242, 251, 384), /* GPP_J */ - TGL_GPP(2, 252, 266, 352), /* GPP_K */ + TGL_GPP(2, 252, 266, 416), /* GPP_K */ }; static const struct intel_padgroup tglh_community5_gpps[] = { - TGL_GPP(0, 267, 281, 416), /* GPP_I */ + TGL_GPP(0, 267, 281, 448), /* GPP_I */ TGL_GPP(1, 282, 290, INTEL_GPIO_BASE_NOMAP), /* JTAG */ }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 5b3b048725cc..45ebdeba985a 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -925,12 +925,10 @@ int mtk_pinconf_adv_pull_set(struct mtk_pinctrl *hw, err = hw->soc->bias_set(hw, desc, pullup); if (err) return err; - } else if (hw->soc->bias_set_combo) { - err = hw->soc->bias_set_combo(hw, desc, pullup, arg); - if (err) - return err; } else { - return -ENOTSUPP; + err = mtk_pinconf_bias_set_rev1(hw, desc, pullup); + if (err) + err = mtk_pinconf_bias_set(hw, desc, pullup); } } diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index a76be6cc26ee..5b764740b829 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -444,8 +444,7 @@ static int amd_gpio_irq_set_wake(struct irq_data *d, unsigned int on) unsigned long flags; struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct amd_gpio *gpio_dev = gpiochip_get_data(gc); - u32 wake_mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | - BIT(WAKE_CNTRL_OFF_S4); + u32 wake_mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3); raw_spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c index f831526d06ff..49e32684dbb2 100644 --- a/drivers/pinctrl/pinctrl-k210.c +++ b/drivers/pinctrl/pinctrl-k210.c @@ -950,23 +950,37 @@ static int k210_fpioa_probe(struct platform_device *pdev) return ret; pdata->pclk = devm_clk_get_optional(dev, "pclk"); - if (!IS_ERR(pdata->pclk)) - clk_prepare_enable(pdata->pclk); + if (!IS_ERR(pdata->pclk)) { + ret = clk_prepare_enable(pdata->pclk); + if (ret) + goto disable_clk; + } pdata->sysctl_map = syscon_regmap_lookup_by_phandle_args(np, "canaan,k210-sysctl-power", 1, &pdata->power_offset); - if (IS_ERR(pdata->sysctl_map)) - return PTR_ERR(pdata->sysctl_map); + if (IS_ERR(pdata->sysctl_map)) { + ret = PTR_ERR(pdata->sysctl_map); + goto disable_pclk; + } k210_fpioa_init_ties(pdata); pdata->pctl = pinctrl_register(&k210_pinctrl_desc, dev, (void *)pdata); - if (IS_ERR(pdata->pctl)) - return PTR_ERR(pdata->pctl); + if (IS_ERR(pdata->pctl)) { + ret = PTR_ERR(pdata->pctl); + goto disable_pclk; + } return 0; + +disable_pclk: + clk_disable_unprepare(pdata->pclk); +disable_clk: + clk_disable_unprepare(pdata->clk); + + return ret; } static const struct of_device_id k210_fpioa_dt_ids[] = { diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig index 2f51b4f99393..cad4e60df618 100644 --- a/drivers/pinctrl/qcom/Kconfig +++ b/drivers/pinctrl/qcom/Kconfig @@ -13,7 +13,7 @@ config PINCTRL_MSM config PINCTRL_APQ8064 tristate "Qualcomm APQ8064 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -21,7 +21,7 @@ config PINCTRL_APQ8064 config PINCTRL_APQ8084 tristate "Qualcomm APQ8084 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -29,7 +29,7 @@ config PINCTRL_APQ8084 config PINCTRL_IPQ4019 tristate "Qualcomm IPQ4019 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -37,7 +37,7 @@ config PINCTRL_IPQ4019 config PINCTRL_IPQ8064 tristate "Qualcomm IPQ8064 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -45,7 +45,7 @@ config PINCTRL_IPQ8064 config PINCTRL_IPQ8074 tristate "Qualcomm Technologies, Inc. IPQ8074 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for @@ -55,7 +55,7 @@ config PINCTRL_IPQ8074 config PINCTRL_IPQ6018 tristate "Qualcomm Technologies, Inc. IPQ6018 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for @@ -65,7 +65,7 @@ config PINCTRL_IPQ6018 config PINCTRL_MSM8226 tristate "Qualcomm 8226 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -74,7 +74,7 @@ config PINCTRL_MSM8226 config PINCTRL_MSM8660 tristate "Qualcomm 8660 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -82,7 +82,7 @@ config PINCTRL_MSM8660 config PINCTRL_MSM8960 tristate "Qualcomm 8960 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -90,7 +90,7 @@ config PINCTRL_MSM8960 config PINCTRL_MDM9615 tristate "Qualcomm 9615 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -98,7 +98,7 @@ config PINCTRL_MDM9615 config PINCTRL_MSM8X74 tristate "Qualcomm 8x74 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -106,7 +106,7 @@ config PINCTRL_MSM8X74 config PINCTRL_MSM8916 tristate "Qualcomm 8916 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -114,7 +114,7 @@ config PINCTRL_MSM8916 config PINCTRL_MSM8953 tristate "Qualcomm 8953 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -124,7 +124,7 @@ config PINCTRL_MSM8953 config PINCTRL_MSM8976 tristate "Qualcomm 8976 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -134,7 +134,7 @@ config PINCTRL_MSM8976 config PINCTRL_MSM8994 tristate "Qualcomm 8994 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -143,7 +143,7 @@ config PINCTRL_MSM8994 config PINCTRL_MSM8996 tristate "Qualcomm MSM8996 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -151,7 +151,7 @@ config PINCTRL_MSM8996 config PINCTRL_MSM8998 tristate "Qualcomm MSM8998 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -159,7 +159,7 @@ config PINCTRL_MSM8998 config PINCTRL_QCS404 tristate "Qualcomm QCS404 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -167,7 +167,7 @@ config PINCTRL_QCS404 config PINCTRL_QDF2XXX tristate "Qualcomm Technologies QDF2xxx pin controller driver" - depends on GPIOLIB && ACPI + depends on ACPI depends on PINCTRL_MSM help This is the GPIO driver for the TLMM block found on the @@ -175,7 +175,7 @@ config PINCTRL_QDF2XXX config PINCTRL_QCOM_SPMI_PMIC tristate "Qualcomm SPMI PMIC pin controller driver" - depends on GPIOLIB && OF && SPMI + depends on OF && SPMI select REGMAP_SPMI select PINMUX select PINCONF @@ -190,7 +190,7 @@ config PINCTRL_QCOM_SPMI_PMIC config PINCTRL_QCOM_SSBI_PMIC tristate "Qualcomm SSBI PMIC pin controller driver" - depends on GPIOLIB && OF + depends on OF select PINMUX select PINCONF select GENERIC_PINCONF @@ -204,7 +204,7 @@ config PINCTRL_QCOM_SSBI_PMIC config PINCTRL_SC7180 tristate "Qualcomm Technologies Inc SC7180 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -213,7 +213,7 @@ config PINCTRL_SC7180 config PINCTRL_SC7280 tristate "Qualcomm Technologies Inc SC7280 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -222,7 +222,7 @@ config PINCTRL_SC7280 config PINCTRL_SC8180X tristate "Qualcomm Technologies Inc SC8180x pin controller driver" - depends on GPIOLIB && (OF || ACPI) + depends on (OF || ACPI) depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -231,7 +231,7 @@ config PINCTRL_SC8180X config PINCTRL_SDM660 tristate "Qualcomm Technologies Inc SDM660 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -240,7 +240,7 @@ config PINCTRL_SDM660 config PINCTRL_SDM845 tristate "Qualcomm Technologies Inc SDM845 pin controller driver" - depends on GPIOLIB && (OF || ACPI) + depends on (OF || ACPI) depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -249,7 +249,7 @@ config PINCTRL_SDM845 config PINCTRL_SDX55 tristate "Qualcomm Technologies Inc SDX55 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -258,7 +258,7 @@ config PINCTRL_SDX55 config PINCTRL_SM6125 tristate "Qualcomm Technologies Inc SM6125 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -267,7 +267,7 @@ config PINCTRL_SM6125 config PINCTRL_SM8150 tristate "Qualcomm Technologies Inc SM8150 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -276,7 +276,7 @@ config PINCTRL_SM8150 config PINCTRL_SM8250 tristate "Qualcomm Technologies Inc SM8250 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -285,8 +285,7 @@ config PINCTRL_SM8250 config PINCTRL_SM8350 tristate "Qualcomm Technologies Inc SM8350 pin controller driver" - depends on GPIOLIB && OF - select PINCTRL_MSM + depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the Qualcomm Technologies Inc TLMM block found on the Qualcomm diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index dc8d39ae045b..9c7679c06dca 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1219,10 +1219,12 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) } /* - * We suppose that we won't have any more functions than pins, - * we'll reallocate that later anyway + * Find an upper bound for the maximum number of functions: in + * the worst case we have gpio_in, gpio_out, irq and up to four + * special functions per pin, plus one entry for the sentinel. + * We'll reallocate that later anyway. */ - pctl->functions = kcalloc(pctl->ngroups, + pctl->functions = kcalloc(4 * pctl->ngroups + 4, sizeof(*pctl->functions), GFP_KERNEL); if (!pctl->functions) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 7d385c3b2239..d12db6c316ea 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -508,6 +508,7 @@ config THINKPAD_ACPI depends on RFKILL || RFKILL = n depends on ACPI_VIDEO || ACPI_VIDEO = n depends on BACKLIGHT_CLASS_DEVICE + depends on I2C select ACPI_PLATFORM_PROFILE select HWMON select NVRAM @@ -691,6 +692,7 @@ config INTEL_HID_EVENT tristate "INTEL HID Event" depends on ACPI depends on INPUT + depends on I2C select INPUT_SPARSEKMAP help This driver provides support for the Intel HID Event hotkey interface. @@ -742,6 +744,7 @@ config INTEL_VBTN tristate "INTEL VIRTUAL BUTTON" depends on ACPI depends on INPUT + depends on I2C select INPUT_SPARSEKMAP help This driver provides support for the Intel Virtual Button interface. diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 0cb927f0f301..a81dc4b191b7 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -41,6 +41,10 @@ static int wapf = -1; module_param(wapf, uint, 0444); MODULE_PARM_DESC(wapf, "WAPF value"); +static int tablet_mode_sw = -1; +module_param(tablet_mode_sw, uint, 0444); +MODULE_PARM_DESC(tablet_mode_sw, "Tablet mode detect: -1:auto 0:disable 1:kbd-dock 2:lid-flip"); + static struct quirk_entry *quirks; static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str, @@ -458,6 +462,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_use_lid_flip_devid, }, + { + .callback = dmi_matched, + .ident = "ASUS TP200s / E205SA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "E205SA"), + }, + .driver_data = &quirk_asus_use_lid_flip_devid, + }, {}, }; @@ -477,6 +490,21 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) else wapf = quirks->wapf; + switch (tablet_mode_sw) { + case 0: + quirks->use_kbd_dock_devid = false; + quirks->use_lid_flip_devid = false; + break; + case 1: + quirks->use_kbd_dock_devid = true; + quirks->use_lid_flip_devid = false; + break; + case 2: + quirks->use_kbd_dock_devid = false; + quirks->use_lid_flip_devid = true; + break; + } + if (quirks->i8042_filter) { ret = i8042_install_filter(quirks->i8042_filter); if (ret) { diff --git a/drivers/platform/x86/dual_accel_detect.h b/drivers/platform/x86/dual_accel_detect.h new file mode 100644 index 000000000000..a9eae17cc43d --- /dev/null +++ b/drivers/platform/x86/dual_accel_detect.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Helper code to detect 360 degree hinges (yoga) style 2-in-1 devices using 2 accelerometers + * to allow the OS to determine the angle between the display and the base of the device. + * + * On Windows these are read by a special HingeAngleService process which calls undocumented + * ACPI methods, to let the firmware know if the 2-in-1 is in tablet- or laptop-mode. + * The firmware may use this to disable the kbd and touchpad to avoid spurious input in + * tablet-mode as well as to report SW_TABLET_MODE info to the OS. + * + * Since Linux does not call these undocumented methods, the SW_TABLET_MODE info reported + * by various drivers/platform/x86 drivers is incorrect. These drivers use the detection + * code in this file to disable SW_TABLET_MODE reporting to avoid reporting broken info + * (instead userspace can derive the status itself by directly reading the 2 accels). + */ + +#include <linux/acpi.h> +#include <linux/i2c.h> + +static int dual_accel_i2c_resource_count(struct acpi_resource *ares, void *data) +{ + struct acpi_resource_i2c_serialbus *sb; + int *count = data; + + if (i2c_acpi_get_i2c_resource(ares, &sb)) + *count = *count + 1; + + return 1; +} + +static int dual_accel_i2c_client_count(struct acpi_device *adev) +{ + int ret, count = 0; + LIST_HEAD(r); + + ret = acpi_dev_get_resources(adev, &r, dual_accel_i2c_resource_count, &count); + if (ret < 0) + return ret; + + acpi_dev_free_resource_list(&r); + return count; +} + +static bool dual_accel_detect_bosc0200(void) +{ + struct acpi_device *adev; + int count; + + adev = acpi_dev_get_first_match_dev("BOSC0200", NULL, -1); + if (!adev) + return false; + + count = dual_accel_i2c_client_count(adev); + + acpi_dev_put(adev); + + return count == 2; +} + +static bool dual_accel_detect(void) +{ + /* Systems which use a pair of accels with KIOX010A / KIOX020A ACPI ids */ + if (acpi_dev_present("KIOX010A", NULL, -1) && + acpi_dev_present("KIOX020A", NULL, -1)) + return true; + + /* Systems which use a single DUAL250E ACPI device to model 2 accels */ + if (acpi_dev_present("DUAL250E", NULL, -1)) + return true; + + /* Systems which use a single BOSC0200 ACPI device to model 2 accels */ + if (dual_accel_detect_bosc0200()) + return true; + + return false; +} diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index fbb224a82e34..7f3a03f937f6 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) }} static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), @@ -147,6 +148,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"), { } diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index e5fbe017f8e1..2e4e97a626a5 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/suspend.h> +#include "dual_accel_detect.h" /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ #define TABLET_MODE_FLAG BIT(6) @@ -122,6 +123,7 @@ struct intel_hid_priv { struct input_dev *array; struct input_dev *switches; bool wakeup_mode; + bool dual_accel; }; #define HID_EVENT_FILTER_UUID "eeec56b3-4442-408f-a792-4edd4d758054" @@ -451,22 +453,9 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) * SW_TABLET_MODE report, in these cases we enable support when receiving * the first event instead of during driver setup. * - * Some 360 degree hinges (yoga) style 2-in-1 devices use 2 accelerometers - * to allow the OS to determine the angle between the display and the base - * of the device. On Windows these are read by a special HingeAngleService - * process which calls an ACPI DSM (Device Specific Method) on the - * ACPI KIOX010A device node for the sensor in the display, to let the - * firmware know if the 2-in-1 is in tablet- or laptop-mode so that it can - * disable the kbd and touchpad to avoid spurious input in tablet-mode. - * - * The linux kxcjk1013 driver calls the DSM for this once at probe time - * to ensure that the builtin kbd and touchpad work. On some devices this - * causes a "spurious" 0xcd event on the intel-hid ACPI dev. In this case - * there is not a functional tablet-mode switch, so we should not register - * the tablet-mode switch device. + * See dual_accel_detect.h for more info on the dual_accel check. */ - if (!priv->switches && (event == 0xcc || event == 0xcd) && - !acpi_dev_present("KIOX010A", NULL, -1)) { + if (!priv->switches && !priv->dual_accel && (event == 0xcc || event == 0xcd)) { dev_info(&device->dev, "switch event received, enable switches supports\n"); err = intel_hid_switches_setup(device); if (err) @@ -607,6 +596,8 @@ static int intel_hid_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + priv->dual_accel = dual_accel_detect(); + err = intel_hid_input_setup(device); if (err) { pr_err("Failed to setup Intel HID hotkeys\n"); diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 888a764efad1..309166431063 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/suspend.h> +#include "dual_accel_detect.h" /* Returned when NOT in tablet mode on some HP Stream x360 11 models */ #define VGBS_TABLET_MODE_FLAG_ALT 0x10 @@ -66,6 +67,7 @@ static const struct key_entry intel_vbtn_switchmap[] = { struct intel_vbtn_priv { struct input_dev *buttons_dev; struct input_dev *switches_dev; + bool dual_accel; bool has_buttons; bool has_switches; bool wakeup_mode; @@ -160,6 +162,10 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) input_dev = priv->buttons_dev; } else if ((ke = sparse_keymap_entry_from_scancode(priv->switches_dev, event))) { if (!priv->has_switches) { + /* See dual_accel_detect.h for more info */ + if (priv->dual_accel) + return; + dev_info(&device->dev, "Registering Intel Virtual Switches input-dev after receiving a switch event\n"); ret = input_register_device(priv->switches_dev); if (ret) @@ -248,11 +254,15 @@ static const struct dmi_system_id dmi_switches_allow_list[] = { {} /* Array terminator */ }; -static bool intel_vbtn_has_switches(acpi_handle handle) +static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel) { unsigned long long vgbs; acpi_status status; + /* See dual_accel_detect.h for more info */ + if (dual_accel) + return false; + if (!dmi_check_system(dmi_switches_allow_list)) return false; @@ -263,13 +273,14 @@ static bool intel_vbtn_has_switches(acpi_handle handle) static int intel_vbtn_probe(struct platform_device *device) { acpi_handle handle = ACPI_HANDLE(&device->dev); - bool has_buttons, has_switches; + bool dual_accel, has_buttons, has_switches; struct intel_vbtn_priv *priv; acpi_status status; int err; + dual_accel = dual_accel_detect(); has_buttons = acpi_has_method(handle, "VBDL"); - has_switches = intel_vbtn_has_switches(handle); + has_switches = intel_vbtn_has_switches(handle, dual_accel); if (!has_buttons && !has_switches) { dev_warn(&device->dev, "failed to read Intel Virtual Button driver\n"); @@ -281,6 +292,7 @@ static int intel_vbtn_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + priv->dual_accel = dual_accel; priv->has_buttons = has_buttons; priv->has_switches = has_switches; diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c index c37349f97bb8..d063d91db9bc 100644 --- a/drivers/platform/x86/pcengines-apuv2.c +++ b/drivers/platform/x86/pcengines-apuv2.c @@ -94,6 +94,7 @@ static struct gpiod_lookup_table gpios_led_table = { NULL, 1, GPIO_ACTIVE_LOW), GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_LED3, NULL, 2, GPIO_ACTIVE_LOW), + {} /* Terminating entry */ } }; @@ -123,6 +124,7 @@ static struct gpiod_lookup_table gpios_key_table = { .table = { GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_MODESW, NULL, 0, GPIO_ACTIVE_LOW), + {} /* Terminating entry */ } }; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 603156a6e3ed..50ff04c84650 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -73,6 +73,7 @@ #include <linux/uaccess.h> #include <acpi/battery.h> #include <acpi/video.h> +#include "dual_accel_detect.h" /* ThinkPad CMOS commands */ #define TP_CMOS_VOLUME_DOWN 0 @@ -3232,7 +3233,7 @@ static int hotkey_init_tablet_mode(void) * the laptop/tent/tablet mode to the EC. The bmc150 iio driver * does not support this, so skip the hotkey on these models. */ - if (has_tablet_mode && !acpi_dev_present("BOSC0200", "1", -1)) + if (has_tablet_mode && !dual_accel_detect()) tp_features.hotkey_tablet = TP_HOTKEY_TABLET_USES_GMMS; type = "GMMS"; } else if (acpi_evalf(hkey_handle, &res, "MHKG", "qd")) { diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 8b08745e1ca1..32660dc11354 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -8,6 +8,7 @@ menu "PTP clock support" config PTP_1588_CLOCK tristate "PTP clock support" depends on NET && POSIX_TIMERS + default ETHERNET select PPS select NET_PTP_CLASSIFY help @@ -26,6 +27,18 @@ config PTP_1588_CLOCK To compile this driver as a module, choose M here: the module will be called ptp. +config PTP_1588_CLOCK_OPTIONAL + tristate + default y if PTP_1588_CLOCK=n + default PTP_1588_CLOCK + help + Drivers that can optionally use the PTP_1588_CLOCK framework + should depend on this symbol to prevent them from being built + into vmlinux while the PTP support itself is in a loadable + module. + If PTP support is disabled, this dependency will still be + met, and drivers refer to dummy helpers. + config PTP_1588_CLOCK_DTE tristate "Broadcom DTE as PTP clock" depends on PTP_1588_CLOCK @@ -90,8 +103,9 @@ config PTP_1588_CLOCK_INES config PTP_1588_CLOCK_PCH tristate "Intel PCH EG20T as PTP clock" depends on X86_32 || COMPILE_TEST - depends on HAS_IOMEM && NET - imply PTP_1588_CLOCK + depends on HAS_IOMEM && PCI + depends on NET + depends on PTP_1588_CLOCK help This driver adds support for using the PCH EG20T as a PTP clock. The hardware supports time stamping of PTP packets @@ -158,11 +172,13 @@ config PTP_1588_CLOCK_OCP depends on PTP_1588_CLOCK depends on HAS_IOMEM && PCI depends on SPI && I2C && MTD + depends on !S390 imply SPI_MEM imply SPI_XILINX imply MTD_SPI_NOR imply I2C_XILINX select SERIAL_8250 + select NET_DEVLINK default n help diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 039d3a5c2a6f..caf9b37c5eb1 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -113,6 +113,8 @@ struct ts_reg { struct pps_reg { u32 ctrl; u32 status; + u32 __pad0[6]; + u32 cable_delay; }; #define PPS_STATUS_FILTER_ERR BIT(0) @@ -149,7 +151,8 @@ struct ptp_ocp { spinlock_t lock; struct ocp_reg __iomem *reg; struct tod_reg __iomem *tod; - struct pps_reg __iomem *pps_monitor; + struct pps_reg __iomem *pps_to_ext; + struct pps_reg __iomem *pps_to_clk; struct ptp_ocp_ext_src *pps; struct ptp_ocp_ext_src *ts0; struct ptp_ocp_ext_src *ts1; @@ -159,17 +162,15 @@ struct ptp_ocp { struct platform_device *i2c_ctrl; struct platform_device *spi_flash; struct clk_hw *i2c_clk; - struct devlink_health_reporter *health; struct timer_list watchdog; - time64_t gps_lost; + time64_t gnss_lost; int id; int n_irqs; - int gps_port; + int gnss_port; int mac_port; /* miniature atomic clock */ u8 serial[6]; int flash_start; bool has_serial; - bool pending_image; }; struct ocp_resource { @@ -181,7 +182,6 @@ struct ocp_resource { unsigned long bp_offset; }; -static void ptp_ocp_health_update(struct ptp_ocp *bp); static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r); static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r); static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r); @@ -251,7 +251,11 @@ static struct ocp_resource ocp_fb_resource[] = { }, }, { - OCP_MEM_RESOURCE(pps_monitor), + OCP_MEM_RESOURCE(pps_to_ext), + .offset = 0x01030000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(pps_to_clk), .offset = 0x01040000, .size = 0x10000, }, { @@ -267,7 +271,7 @@ static struct ocp_resource ocp_fb_resource[] = { .offset = 0x00150000, .size = 0x10000, .irq_vec = 7, }, { - OCP_SERIAL_RESOURCE(gps_port), + OCP_SERIAL_RESOURCE(gnss_port), .offset = 0x00160000 + 0x1000, .irq_vec = 3, }, { @@ -537,21 +541,19 @@ ptp_ocp_watchdog(struct timer_list *t) unsigned long flags; u32 status; - status = ioread32(&bp->pps_monitor->status); + status = ioread32(&bp->pps_to_clk->status); if (status & PPS_STATUS_SUPERV_ERR) { - iowrite32(status, &bp->pps_monitor->status); - if (!bp->gps_lost) { + iowrite32(status, &bp->pps_to_clk->status); + if (!bp->gnss_lost) { spin_lock_irqsave(&bp->lock, flags); __ptp_ocp_clear_drift_locked(bp); spin_unlock_irqrestore(&bp->lock, flags); - bp->gps_lost = ktime_get_real_seconds(); - ptp_ocp_health_update(bp); + bp->gnss_lost = ktime_get_real_seconds(); } - } else if (bp->gps_lost) { - bp->gps_lost = 0; - ptp_ocp_health_update(bp); + } else if (bp->gnss_lost) { + bp->gnss_lost = 0; } mod_timer(&bp->watchdog, jiffies + HZ); @@ -733,45 +735,6 @@ ptp_ocp_info(struct ptp_ocp *bp) ptp_ocp_tod_info(bp); } -static const struct devlink_param ptp_ocp_devlink_params[] = { -}; - -static void -ptp_ocp_devlink_set_params_init_values(struct devlink *devlink) -{ -} - -static int -ptp_ocp_devlink_register(struct devlink *devlink, struct device *dev) -{ - int err; - - err = devlink_register(devlink, dev); - if (err) - return err; - - err = devlink_params_register(devlink, ptp_ocp_devlink_params, - ARRAY_SIZE(ptp_ocp_devlink_params)); - ptp_ocp_devlink_set_params_init_values(devlink); - if (err) - goto out; - devlink_params_publish(devlink); - - return 0; - -out: - devlink_unregister(devlink); - return err; -} - -static void -ptp_ocp_devlink_unregister(struct devlink *devlink) -{ - devlink_params_unregister(devlink, ptp_ocp_devlink_params, - ARRAY_SIZE(ptp_ocp_devlink_params)); - devlink_unregister(devlink); -} - static struct device * ptp_ocp_find_flash(struct ptp_ocp *bp) { @@ -800,7 +763,7 @@ ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev, size_t off, len, resid, wrote; struct erase_info erase; size_t base, blksz; - int err; + int err = 0; off = 0; base = bp->flash_start; @@ -854,8 +817,6 @@ ptp_ocp_devlink_flash_update(struct devlink *devlink, msg = err ? "Flash error" : "Flash complete"; devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0); - bp->pending_image = true; - put_device(dev); return err; } @@ -872,25 +833,18 @@ ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, if (err) return err; - if (bp->pending_image) { - err = devlink_info_version_stored_put(req, - "timecard", "pending"); - if (err) - return err; - } - if (bp->image) { u32 ver = ioread32(&bp->image->version); if (ver & 0xffff) { sprintf(buf, "%d", ver); err = devlink_info_version_running_put(req, - "timecard", + "fw", buf); } else { sprintf(buf, "%d", ver >> 16); err = devlink_info_version_running_put(req, - "golden flash", + "loader", buf); } if (err) @@ -915,58 +869,6 @@ static const struct devlink_ops ptp_ocp_devlink_ops = { .info_get = ptp_ocp_devlink_info_get, }; -static int -ptp_ocp_health_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) -{ - struct ptp_ocp *bp = devlink_health_reporter_priv(reporter); - char buf[32]; - int err; - - if (!bp->gps_lost) - return 0; - - sprintf(buf, "%ptT", &bp->gps_lost); - err = devlink_fmsg_string_pair_put(fmsg, "Lost sync at", buf); - if (err) - return err; - - return 0; -} - -static void -ptp_ocp_health_update(struct ptp_ocp *bp) -{ - int state; - - state = bp->gps_lost ? DEVLINK_HEALTH_REPORTER_STATE_ERROR - : DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; - - if (bp->gps_lost) - devlink_health_report(bp->health, "No GPS signal", NULL); - - devlink_health_reporter_state_update(bp->health, state); -} - -static const struct devlink_health_reporter_ops ptp_ocp_health_ops = { - .name = "gps_sync", - .diagnose = ptp_ocp_health_diagnose, -}; - -static void -ptp_ocp_devlink_health_register(struct devlink *devlink) -{ - struct ptp_ocp *bp = devlink_priv(devlink); - struct devlink_health_reporter *r; - - r = devlink_health_reporter_create(devlink, &ptp_ocp_health_ops, 0, bp); - if (IS_ERR(r)) - dev_err(&bp->pdev->dev, "Failed to create reporter, err %ld\n", - PTR_ERR(r)); - bp->health = r; -} - static void __iomem * __ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size) { @@ -1265,19 +1167,19 @@ serialnum_show(struct device *dev, struct device_attribute *attr, char *buf) static DEVICE_ATTR_RO(serialnum); static ssize_t -gps_sync_show(struct device *dev, struct device_attribute *attr, char *buf) +gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ptp_ocp *bp = dev_get_drvdata(dev); ssize_t ret; - if (bp->gps_lost) - ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gps_lost); + if (bp->gnss_lost) + ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost); else ret = sysfs_emit(buf, "SYNC\n"); return ret; } -static DEVICE_ATTR_RO(gps_sync); +static DEVICE_ATTR_RO(gnss_sync); static ssize_t clock_source_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1334,7 +1236,7 @@ static DEVICE_ATTR_RO(available_clock_sources); static struct attribute *timecard_attrs[] = { &dev_attr_serialnum.attr, - &dev_attr_gps_sync.attr, + &dev_attr_gnss_sync.attr, &dev_attr_clock_source.attr, &dev_attr_available_clock_sources.attr, NULL, @@ -1367,7 +1269,7 @@ ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) bp->ptp_info = ptp_ocp_clock_info; spin_lock_init(&bp->lock); - bp->gps_port = -1; + bp->gnss_port = -1; bp->mac_port = -1; bp->pdev = pdev; @@ -1381,7 +1283,6 @@ ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) err = device_add(&bp->dev); if (err) { dev_err(&bp->dev, "device add failed: %d\n", err); - put_device(&bp->dev); goto out; } @@ -1391,6 +1292,7 @@ ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) out: ptp_ocp_dev_release(&bp->dev); + put_device(&bp->dev); return err; } @@ -1426,9 +1328,9 @@ ptp_ocp_complete(struct ptp_ocp *bp) struct pps_device *pps; char buf[32]; - if (bp->gps_port != -1) { - sprintf(buf, "ttyS%d", bp->gps_port); - ptp_ocp_link_child(bp, buf, "ttyGPS"); + if (bp->gnss_port != -1) { + sprintf(buf, "ttyS%d", bp->gnss_port); + ptp_ocp_link_child(bp, buf, "ttyGNSS"); } if (bp->mac_port != -1) { sprintf(buf, "ttyS%d", bp->mac_port); @@ -1463,8 +1365,8 @@ ptp_ocp_resource_summary(struct ptp_ocp *bp) dev_info(dev, "golden image, version %d\n", ver >> 16); } - if (bp->gps_port != -1) - dev_info(dev, "GPS @ /dev/ttyS%d 115200\n", bp->gps_port); + if (bp->gnss_port != -1) + dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port); if (bp->mac_port != -1) dev_info(dev, "MAC @ /dev/ttyS%d 57600\n", bp->mac_port); } @@ -1474,7 +1376,7 @@ ptp_ocp_detach_sysfs(struct ptp_ocp *bp) { struct device *dev = &bp->dev; - sysfs_remove_link(&dev->kobj, "ttyGPS"); + sysfs_remove_link(&dev->kobj, "ttyGNSS"); sysfs_remove_link(&dev->kobj, "ttyMAC"); sysfs_remove_link(&dev->kobj, "ptp"); sysfs_remove_link(&dev->kobj, "pps"); @@ -1493,8 +1395,8 @@ ptp_ocp_detach(struct ptp_ocp *bp) ptp_ocp_unregister_ext(bp->ts1); if (bp->pps) ptp_ocp_unregister_ext(bp->pps); - if (bp->gps_port != -1) - serial8250_unregister_port(bp->gps_port); + if (bp->gnss_port != -1) + serial8250_unregister_port(bp->gnss_port); if (bp->mac_port != -1) serial8250_unregister_port(bp->mac_port); if (bp->spi_flash) @@ -1507,8 +1409,6 @@ ptp_ocp_detach(struct ptp_ocp *bp) pci_free_irq_vectors(bp->pdev); if (bp->ptp) ptp_clock_unregister(bp->ptp); - if (bp->health) - devlink_health_reporter_destroy(bp->health); device_unregister(&bp->dev); } @@ -1519,13 +1419,13 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct ptp_ocp *bp; int err; - devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp)); + devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev); if (!devlink) { dev_err(&pdev->dev, "devlink_alloc failed\n"); return -ENOMEM; } - err = ptp_ocp_devlink_register(devlink, &pdev->dev); + err = devlink_register(devlink); if (err) goto out_free; @@ -1538,7 +1438,7 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) bp = devlink_priv(devlink); err = ptp_ocp_device_init(bp, pdev); if (err) - goto out_unregister; + goto out_disable; /* compat mode. * Older FPGA firmware only returns 2 irq's. @@ -1571,16 +1471,16 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) ptp_ocp_info(bp); ptp_ocp_resource_summary(bp); - ptp_ocp_devlink_health_register(devlink); return 0; out: ptp_ocp_detach(bp); - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); +out_disable: + pci_disable_device(pdev); out_unregister: - ptp_ocp_devlink_unregister(devlink); + devlink_unregister(devlink); out_free: devlink_free(devlink); @@ -1594,10 +1494,10 @@ ptp_ocp_remove(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(bp); ptp_ocp_detach(bp); - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); + pci_disable_device(pdev); - ptp_ocp_devlink_unregister(devlink); + devlink_unregister(devlink); devlink_free(devlink); } diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index b3d96b747292..41b92dc2f011 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -154,7 +154,7 @@ static int unregister_vclock(struct device *dev, void *data) struct ptp_clock *ptp = dev_get_drvdata(dev); struct ptp_clock_info *info = ptp->info; struct ptp_vclock *vclock; - u8 *num = data; + u32 *num = data; vclock = info_to_vclock(info); dev_info(dev->parent, "delete virtual clock ptp%d\n", diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index e0f87c57749a..baee0379482b 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -149,6 +149,7 @@ void ptp_vclock_unregister(struct ptp_vclock *vclock) kfree(vclock); } +#if IS_BUILTIN(CONFIG_PTP_1588_CLOCK) int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { char name[PTP_CLOCK_NAME_LEN] = ""; @@ -217,3 +218,4 @@ void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, hwtstamps->hwtstamp = ns_to_ktime(ns); } EXPORT_SYMBOL(ptp_convert_timestamp); +#endif diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0de1a463c509..fb5d8152652d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1004,15 +1004,23 @@ static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len) static void dasd_eckd_store_conf_data(struct dasd_device *device, struct dasd_conf_data *conf_data, int chp) { + struct dasd_eckd_private *private = device->private; struct channel_path_desc_fmt0 *chp_desc; struct subchannel_id sch_id; + void *cdp; - ccw_device_get_schid(device->cdev, &sch_id); /* * path handling and read_conf allocate data * free it before replacing the pointer + * also replace the old private->conf_data pointer + * with the new one if this points to the same data */ - kfree(device->path[chp].conf_data); + cdp = device->path[chp].conf_data; + if (private->conf_data == cdp) { + private->conf_data = (void *)conf_data; + dasd_eckd_identify_conf_parts(private); + } + ccw_device_get_schid(device->cdev, &sch_id); device->path[chp].conf_data = conf_data; device->path[chp].cssid = sch_id.cssid; device->path[chp].ssid = sch_id.ssid; @@ -1020,6 +1028,7 @@ static void dasd_eckd_store_conf_data(struct dasd_device *device, if (chp_desc) device->path[chp].chpid = chp_desc->chpid; kfree(chp_desc); + kfree(cdp); } static void dasd_eckd_clear_conf_data(struct dasd_device *device) diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index cff91b4f1a76..9c67b97faba2 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -74,6 +74,7 @@ config QETH_L2 def_tristate y prompt "qeth layer 2 device support" depends on QETH + depends on BRIDGE || BRIDGE=n help Select this option to be able to run qeth devices in layer 2 mode. To compile as a module, choose M. The module name is qeth_l2. diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c index 377e3689d1d4..06281a0a0552 100644 --- a/drivers/s390/net/ctcm_fsms.c +++ b/drivers/s390/net/ctcm_fsms.c @@ -1444,7 +1444,7 @@ again: if (do_debug_ccw) ctcmpc_dumpit((char *)&ch->ccw[0], sizeof(struct ccw1) * 3); - dolock = !in_irq(); + dolock = !in_hardirq(); if (dolock) spin_lock_irqsave( get_ccwdev_lock(ch->cdev), saveflags); diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index 19ee91acb89d..f0436f555c62 100644 --- a/drivers/s390/net/ctcm_mpc.c +++ b/drivers/s390/net/ctcm_mpc.c @@ -1773,7 +1773,7 @@ static void mpc_action_side_xid(fsm_instance *fsm, void *arg, int side) CTCM_D3_DUMP((char *)ch->xid, XID2_LENGTH); CTCM_D3_DUMP((char *)ch->xid_id, 4); - if (!in_irq()) { + if (!in_hardirq()) { /* Such conditional locking is a known problem for * sparse because its static undeterministic. * Warnings should be ignored here. */ diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 69afc0311dd1..72e84ff9fea5 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -279,7 +279,7 @@ static void qeth_l2_set_pnso_mode(struct qeth_card *card, static void qeth_l2_dev2br_fdb_flush(struct qeth_card *card) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; QETH_CARD_TEXT(card, 2, "fdbflush"); @@ -636,7 +636,7 @@ static void qeth_l2_dev2br_fdb_notify(struct qeth_card *card, u8 code, struct net_if_token *token, struct mac_addr_lnid *addr_lnid) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; u8 ntfy_mac[ETH_ALEN]; ether_addr_copy(ntfy_mac, addr_lnid->mac); @@ -717,6 +717,227 @@ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable) return rc; } +struct qeth_l2_br2dev_event_work { + struct work_struct work; + struct net_device *br_dev; + struct net_device *lsync_dev; + struct net_device *dst_dev; + unsigned long event; + unsigned char addr[ETH_ALEN]; +}; + +static const struct net_device_ops qeth_l2_netdev_ops; + +static bool qeth_l2_must_learn(struct net_device *netdev, + struct net_device *dstdev) +{ + struct qeth_priv *priv; + + priv = netdev_priv(netdev); + return (netdev != dstdev && + (priv->brport_features & BR_LEARNING_SYNC) && + !(br_port_flag_is_set(netdev, BR_ISOLATED) && + br_port_flag_is_set(dstdev, BR_ISOLATED)) && + netdev->netdev_ops == &qeth_l2_netdev_ops); +} + +/** + * qeth_l2_br2dev_worker() - update local MACs + * @work: bridge to device FDB update + * + * Update local MACs of a learning_sync bridgeport so it can receive + * messages for a destination port. + * In case of an isolated learning_sync port, also update its isolated + * siblings. + */ +static void qeth_l2_br2dev_worker(struct work_struct *work) +{ + struct qeth_l2_br2dev_event_work *br2dev_event_work = + container_of(work, struct qeth_l2_br2dev_event_work, work); + struct net_device *lsyncdev = br2dev_event_work->lsync_dev; + struct net_device *dstdev = br2dev_event_work->dst_dev; + struct net_device *brdev = br2dev_event_work->br_dev; + unsigned long event = br2dev_event_work->event; + unsigned char *addr = br2dev_event_work->addr; + struct qeth_card *card = lsyncdev->ml_priv; + struct net_device *lowerdev; + struct list_head *iter; + int err = 0; + + kfree(br2dev_event_work); + QETH_CARD_TEXT_(card, 4, "b2dw%04x", event); + QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr)); + + rcu_read_lock(); + /* Verify preconditions are still valid: */ + if (!netif_is_bridge_port(lsyncdev) || + brdev != netdev_master_upper_dev_get_rcu(lsyncdev)) + goto unlock; + if (!qeth_l2_must_learn(lsyncdev, dstdev)) + goto unlock; + + if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) { + /* Update lsyncdev and its isolated sibling(s): */ + iter = &brdev->adj_list.lower; + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + while (lowerdev) { + if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) { + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = dev_uc_add(lowerdev, addr); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = dev_uc_del(lowerdev, addr); + break; + default: + break; + } + if (err) { + QETH_CARD_TEXT(card, 2, "b2derris"); + QETH_CARD_TEXT_(card, 2, + "err%02x%03d", event, + lowerdev->ifindex); + } + } + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + } + } else { + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = dev_uc_add(lsyncdev, addr); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = dev_uc_del(lsyncdev, addr); + break; + default: + break; + } + if (err) + QETH_CARD_TEXT_(card, 2, "b2derr%02x", event); + } + +unlock: + rcu_read_unlock(); + dev_put(brdev); + dev_put(lsyncdev); + dev_put(dstdev); +} + +static int qeth_l2_br2dev_queue_work(struct net_device *brdev, + struct net_device *lsyncdev, + struct net_device *dstdev, + unsigned long event, + const unsigned char *addr) +{ + struct qeth_l2_br2dev_event_work *worker_data; + struct qeth_card *card; + + worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC); + if (!worker_data) + return -ENOMEM; + INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker); + worker_data->br_dev = brdev; + worker_data->lsync_dev = lsyncdev; + worker_data->dst_dev = dstdev; + worker_data->event = event; + ether_addr_copy(worker_data->addr, addr); + + card = lsyncdev->ml_priv; + /* Take a reference on the sw port devices and the bridge */ + dev_hold(brdev); + dev_hold(lsyncdev); + dev_hold(dstdev); + queue_work(card->event_wq, &worker_data->work); + return 0; +} + +/* Called under rtnl_lock */ +static int qeth_l2_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dstdev, *brdev, *lowerdev; + struct switchdev_notifier_fdb_info *fdb_info; + struct switchdev_notifier_info *info = ptr; + struct list_head *iter; + struct qeth_card *card; + int rc; + + if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE || + event == SWITCHDEV_FDB_DEL_TO_DEVICE)) + return NOTIFY_DONE; + + dstdev = switchdev_notifier_info_to_dev(info); + brdev = netdev_master_upper_dev_get_rcu(dstdev); + if (!brdev || !netif_is_bridge_master(brdev)) + return NOTIFY_DONE; + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + iter = &brdev->adj_list.lower; + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + while (lowerdev) { + if (qeth_l2_must_learn(lowerdev, dstdev)) { + card = lowerdev->ml_priv; + QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event); + rc = qeth_l2_br2dev_queue_work(brdev, lowerdev, + dstdev, event, + fdb_info->addr); + if (rc) { + QETH_CARD_TEXT(card, 2, "b2dqwerr"); + return NOTIFY_BAD; + } + } + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + } + return NOTIFY_DONE; +} + +static struct notifier_block qeth_l2_sw_notifier = { + .notifier_call = qeth_l2_switchdev_event, +}; + +static refcount_t qeth_l2_switchdev_notify_refcnt; + +/* Called under rtnl_lock */ +static void qeth_l2_br2dev_get(void) +{ + int rc; + + if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) { + rc = register_switchdev_notifier(&qeth_l2_sw_notifier); + if (rc) { + QETH_DBF_MESSAGE(2, + "failed to register qeth_l2_sw_notifier: %d\n", + rc); + } else { + refcount_set(&qeth_l2_switchdev_notify_refcnt, 1); + QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n"); + } + } + QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d", + qeth_l2_switchdev_notify_refcnt.refs.counter); +} + +/* Called under rtnl_lock */ +static void qeth_l2_br2dev_put(void) +{ + int rc; + + if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) { + rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier); + if (rc) { + QETH_DBF_MESSAGE(2, + "failed to unregister qeth_l2_sw_notifier: %d\n", + rc); + } else { + QETH_DBF_MESSAGE(2, + "qeth_l2_sw_notifier unregistered\n"); + } + } + QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d", + qeth_l2_switchdev_notify_refcnt.refs.counter); +} + static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags) @@ -810,16 +1031,19 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, } else if (enable) { qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO); rc = qeth_l2_dev2br_an_set(card, true); - if (rc) + if (rc) { qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE); - else + } else { priv->brport_features |= BR_LEARNING_SYNC; + qeth_l2_br2dev_get(); + } } else { rc = qeth_l2_dev2br_an_set(card, false); if (!rc) { qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE); priv->brport_features ^= BR_LEARNING_SYNC; qeth_l2_dev2br_fdb_flush(card); + qeth_l2_br2dev_put(); } } mutex_unlock(&card->sbp_lock); @@ -2072,6 +2296,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) static void qeth_l2_remove_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct qeth_priv *priv; if (gdev->dev.type != &qeth_l2_devtype) device_remove_groups(&gdev->dev, qeth_l2_attr_groups); @@ -2083,8 +2308,15 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) qeth_set_offline(card, card->discipline, false); cancel_work_sync(&card->close_dev_work); - if (card->dev->reg_state == NETREG_REGISTERED) + if (card->dev->reg_state == NETREG_REGISTERED) { + priv = netdev_priv(card->dev); + if (priv->brport_features & BR_LEARNING_SYNC) { + rtnl_lock(); + qeth_l2_br2dev_put(); + rtnl_unlock(); + } unregister_netdev(card->dev); + } } static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok) @@ -2207,6 +2439,7 @@ EXPORT_SYMBOL_GPL(qeth_l2_discipline); static int __init qeth_l2_init(void) { pr_info("register layer 2 discipline\n"); + refcount_set(&qeth_l2_switchdev_notify_refcnt, 0); return 0; } diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig index 8b0deece9758..63c8a0f3cd0c 100644 --- a/drivers/scsi/cxgbi/cxgb4i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig @@ -2,6 +2,7 @@ config SCSI_CXGB4_ISCSI tristate "Chelsio T4 iSCSI support" depends on PCI && INET && (IPV6 || IPV6=n) + depends on PTP_1588_CLOCK_OPTIONAL depends on THERMAL || !THERMAL depends on ETHERNET depends on TLS || TLS=n diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 25f6e1ac9e7b..66652ab409cc 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev, if (!h->ctlr) err = SCSI_DH_RES_TEMP_UNAVAIL; else { - list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; + list_add_rcu(&h->node, &h->ctlr->dh_list); } spin_unlock(&list_lock); err = SCSI_DH_OK; @@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev ) spin_lock(&list_lock); if (h->ctlr) { list_del_rcu(&h->node); - h->sdev = NULL; kref_put(&h->ctlr->kref, release_controller); } spin_unlock(&list_lock); sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index bee1bec49c09..935b01ee44b7 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost, for (i = 0; i < size; ++i) { struct ibmvfc_event *evt = &pool->events[i]; + /* + * evt->active states + * 1 = in flight + * 0 = being completed + * -1 = free/freed + */ + atomic_set(&evt->active, -1); atomic_set(&evt->free, 1); evt->crq.valid = 0x80; evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i)); @@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt) BUG_ON(!ibmvfc_valid_event(pool, evt)); BUG_ON(atomic_inc_return(&evt->free) != 1); + BUG_ON(atomic_dec_and_test(&evt->active)); spin_lock_irqsave(&evt->queue->l_lock, flags); list_add_tail(&evt->queue_list, &evt->queue->free); @@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list) **/ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code) { + /* + * Anything we are failing should still be active. Otherwise, it + * implies we already got a response for the command and are doing + * something bad like double completing it. + */ + BUG_ON(!atomic_dec_and_test(&evt->active)); if (evt->cmnd) { evt->cmnd->result = (error_code << 16); evt->done = ibmvfc_scsi_eh_done; @@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, evt->done(evt); } else { + atomic_set(&evt->active, 1); spin_unlock_irqrestore(&evt->queue->l_lock, flags); ibmvfc_trc_start(evt); } @@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; @@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 4f0f3baefae4..92fb889d7eb0 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -745,6 +745,7 @@ struct ibmvfc_event { struct ibmvfc_target *tgt; struct scsi_cmnd *cmnd; atomic_t free; + atomic_t active; union ibmvfc_iu *xfer_iu; void (*done)(struct ibmvfc_event *evt); void (*_done)(struct ibmvfc_event *evt); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 5983e05b648f..e29523a1b530 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -13193,6 +13193,8 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) if (!phba) return -ENOMEM; + INIT_LIST_HEAD(&phba->poll_list); + /* Perform generic PCI device enabling operation */ error = lpfc_enable_pci_dev(phba); if (error) @@ -13327,7 +13329,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) /* Enable RAS FW log support */ lpfc_sli4_ras_setup(phba); - INIT_LIST_HEAD(&phba->poll_list); timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp); diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index abf7b401f5b9..c509440bd161 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) mimd_t mimd; uint32_t adapno; int iterator; - + bool is_found; if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) { *rval = -EFAULT; @@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) adapter = NULL; iterator = 0; + is_found = false; list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } - if (!adapter) { + if (!is_found) { *rval = -ENODEV; return NULL; } @@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc) uint32_t adapno; int iterator; mraid_mmadp_t* adapter; + bool is_found; /* * When the kioc returns from driver, make sure it still doesn't @@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc) iterator = 0; adapter = NULL; adapno = kioc->adapno; + is_found = false; con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed " "ioctl that was timedout before\n")); list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } kioc->timedout = 0; - if (adapter) { + if (is_found) mraid_mm_dealloc_kioc( adapter, kioc ); - } + } else { wake_up(&wait_q); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 19b1c0cf5f2a..cf4a3a2c22ad 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7851,7 +7851,7 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc) return r; } - rc = _base_static_config_pages(ioc); + r = _base_static_config_pages(ioc); if (r) return r; diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 48548a95327b..32e60f0c3b14 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev) void pm8001_task_done(struct sas_task *task) { - if (!del_timer(&task->slow_task->timer)) - return; + del_timer(&task->slow_task->timer); complete(&task->slow_task->completion); } @@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t) { struct sas_task_slow *slow = from_timer(slow, t, timer); struct sas_task *task = slow->task; + unsigned long flags; - task->task_state_flags |= SAS_TASK_STATE_ABORTED; - complete(&task->slow_task->completion); + spin_lock_irqsave(&task->task_state_lock, flags); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->slow_task->completion); + } + spin_unlock_irqrestore(&task->task_state_lock, flags); } #define PM8001_TASK_TIMEOUT 20 @@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, } res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task[%x]timeout.\n", - tmf->tmf); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", + tmf->tmf); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && @@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, wait_for_completion(&task->slow_task->completion); res = TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task timeout.\n"); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n"); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index b059bf2b61d4..5b6996a2401b 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, error = shost->hostt->target_alloc(starget); if(error) { - dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error); + if (error != -ENXIO) + dev_err(dev, "target allocation failed, error %d\n", error); /* don't want scsi_target_reap to do the final * put because it will be under the host lock */ scsi_target_destroy(starget); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 32489d25158f..ae9bfc658203 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); /* - * If the device state changes to SDEV_RUNNING, we need to run - * the queue to avoid I/O hang. + * If the device state changes to SDEV_RUNNING, we need to + * rescan the device to revalidate it, and run the queue to + * avoid I/O hang. */ - if (ret == 0 && state == SDEV_RUNNING) + if (ret == 0 && state == SDEV_RUNNING) { + scsi_rescan_device(dev); blk_mq_run_hw_queues(sdev->request_queue, true); + } mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 94c254e9012e..a6d3ac0a6cbc 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev) else if (med->media_event_code == 2) return DISK_EVENT_MEDIA_CHANGE; else if (med->media_event_code == 3) - return DISK_EVENT_EJECT_REQUEST; + return DISK_EVENT_MEDIA_CHANGE; return 0; } diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 328bb961c281..37506b3fe5a9 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1199,14 +1199,24 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, vstor_packet->vm_srb.sense_info_length); if (vstor_packet->vm_srb.scsi_status != 0 || - vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) - storvsc_log(device, STORVSC_LOGGING_ERROR, + vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) { + + /* + * Log TEST_UNIT_READY errors only as warnings. Hyper-V can + * return errors when detecting devices using TEST_UNIT_READY, + * and logging these as errors produces unhelpful noise. + */ + int loglevel = (stor_pkt->vm_srb.cdb[0] == TEST_UNIT_READY) ? + STORVSC_LOGGING_WARN : STORVSC_LOGGING_ERROR; + + storvsc_log(device, loglevel, "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n", request->cmd->request->tag, stor_pkt->vm_srb.cdb[0], vstor_packet->vm_srb.scsi_status, vstor_packet->vm_srb.srb_status, vstor_packet->status); + } if (vstor_packet->vm_srb.scsi_status == SAM_STAT_CHECK_CONDITION && (vstor_packet->vm_srb.srb_status & SRB_STATUS_AUTOSENSE_VALID)) diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index f678e4d9e585..a05e9fbcd3e0 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MACH_DOVE) += dove/ obj-y += fsl/ obj-$(CONFIG_ARCH_GEMINI) += gemini/ obj-y += imx/ -obj-$(CONFIG_ARCH_IXP4XX) += ixp4xx/ +obj-y += ixp4xx/ obj-$(CONFIG_SOC_XWAY) += lantiq/ obj-$(CONFIG_LITEX_SOC_CONTROLLER) += litex/ obj-y += mediatek/ diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index 071e14496e4b..cc57a384d74d 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -5,8 +5,6 @@ #include <linux/init.h> #include <linux/io.h> -#include <linux/module.h> -#include <linux/nvmem-consumer.h> #include <linux/of_address.h> #include <linux/slab.h> #include <linux/sys_soc.h> @@ -31,7 +29,7 @@ struct imx8_soc_data { char *name; - u32 (*soc_revision)(struct device *dev); + u32 (*soc_revision)(void); }; static u64 soc_uid; @@ -52,7 +50,7 @@ static u32 imx8mq_soc_revision_from_atf(void) static inline u32 imx8mq_soc_revision_from_atf(void) { return 0; }; #endif -static u32 __init imx8mq_soc_revision(struct device *dev) +static u32 __init imx8mq_soc_revision(void) { struct device_node *np; void __iomem *ocotp_base; @@ -77,20 +75,9 @@ static u32 __init imx8mq_soc_revision(struct device *dev) rev = REV_B1; } - if (dev) { - int ret; - - ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid); - if (ret) { - iounmap(ocotp_base); - of_node_put(np); - return ret; - } - } else { - soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH); - soc_uid <<= 32; - soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); - } + soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH); + soc_uid <<= 32; + soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); iounmap(ocotp_base); of_node_put(np); @@ -120,7 +107,7 @@ static void __init imx8mm_soc_uid(void) of_node_put(np); } -static u32 __init imx8mm_soc_revision(struct device *dev) +static u32 __init imx8mm_soc_revision(void) { struct device_node *np; void __iomem *anatop_base; @@ -138,15 +125,7 @@ static u32 __init imx8mm_soc_revision(struct device *dev) iounmap(anatop_base); of_node_put(np); - if (dev) { - int ret; - - ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid); - if (ret) - return ret; - } else { - imx8mm_soc_uid(); - } + imx8mm_soc_uid(); return rev; } @@ -171,7 +150,7 @@ static const struct imx8_soc_data imx8mp_soc_data = { .soc_revision = imx8mm_soc_revision, }; -static __maybe_unused const struct of_device_id imx8_machine_match[] = { +static __maybe_unused const struct of_device_id imx8_soc_match[] = { { .compatible = "fsl,imx8mq", .data = &imx8mq_soc_data, }, { .compatible = "fsl,imx8mm", .data = &imx8mm_soc_data, }, { .compatible = "fsl,imx8mn", .data = &imx8mn_soc_data, }, @@ -179,20 +158,12 @@ static __maybe_unused const struct of_device_id imx8_machine_match[] = { { } }; -static __maybe_unused const struct of_device_id imx8_soc_match[] = { - { .compatible = "fsl,imx8mq-soc", .data = &imx8mq_soc_data, }, - { .compatible = "fsl,imx8mm-soc", .data = &imx8mm_soc_data, }, - { .compatible = "fsl,imx8mn-soc", .data = &imx8mn_soc_data, }, - { .compatible = "fsl,imx8mp-soc", .data = &imx8mp_soc_data, }, - { } -}; - #define imx8_revision(soc_rev) \ soc_rev ? \ kasprintf(GFP_KERNEL, "%d.%d", (soc_rev >> 4) & 0xf, soc_rev & 0xf) : \ "unknown" -static int imx8_soc_info(struct platform_device *pdev) +static int __init imx8_soc_init(void) { struct soc_device_attribute *soc_dev_attr; struct soc_device *soc_dev; @@ -211,10 +182,7 @@ static int imx8_soc_info(struct platform_device *pdev) if (ret) goto free_soc; - if (pdev) - id = of_match_node(imx8_soc_match, pdev->dev.of_node); - else - id = of_match_node(imx8_machine_match, of_root); + id = of_match_node(imx8_soc_match, of_root); if (!id) { ret = -ENODEV; goto free_soc; @@ -223,16 +191,8 @@ static int imx8_soc_info(struct platform_device *pdev) data = id->data; if (data) { soc_dev_attr->soc_id = data->name; - if (data->soc_revision) { - if (pdev) { - soc_rev = data->soc_revision(&pdev->dev); - ret = soc_rev; - if (ret < 0) - goto free_soc; - } else { - soc_rev = data->soc_revision(NULL); - } - } + if (data->soc_revision) + soc_rev = data->soc_revision(); } soc_dev_attr->revision = imx8_revision(soc_rev); @@ -270,24 +230,4 @@ free_soc: kfree(soc_dev_attr); return ret; } - -/* Retain device_initcall is for backward compatibility with DTS. */ -static int __init imx8_soc_init(void) -{ - if (of_find_matching_node_and_match(NULL, imx8_soc_match, NULL)) - return 0; - - return imx8_soc_info(NULL); -} device_initcall(imx8_soc_init); - -static struct platform_driver imx8_soc_info_driver = { - .probe = imx8_soc_info, - .driver = { - .name = "imx8_soc_info", - .of_match_table = imx8_soc_match, - }, -}; - -module_platform_driver(imx8_soc_info_driver); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index 7bd19354982a..f490c4ca51f5 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -21,7 +21,6 @@ #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/soc/ixp4xx/npe.h> -#include <mach/hardware.h> #include <linux/soc/ixp4xx/cpu.h> #define DEBUG_MSG 0 @@ -694,8 +693,8 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) if (!(ixp4xx_read_feature_bits() & (IXP4XX_FEATURE_RESET_NPEA << i))) { - dev_info(dev, "NPE%d at 0x%08x-0x%08x not available\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR not available\n", + i, res); continue; /* NPE already disabled or not present */ } npe->regs = devm_ioremap_resource(dev, res); @@ -703,13 +702,12 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) return PTR_ERR(npe->regs); if (npe_reset(npe)) { - dev_info(dev, "NPE%d at 0x%08x-0x%08x does not reset\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR does not reset\n", + i, res); continue; } npe->valid = 1; - dev_info(dev, "NPE%d at 0x%08x-0x%08x registered\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR registered\n", i, res); found++; } diff --git a/drivers/soc/ixp4xx/ixp4xx-qmgr.c b/drivers/soc/ixp4xx/ixp4xx-qmgr.c index 7149510b307e..9154c7029b05 100644 --- a/drivers/soc/ixp4xx/ixp4xx-qmgr.c +++ b/drivers/soc/ixp4xx/ixp4xx-qmgr.c @@ -12,7 +12,6 @@ #include <linux/of.h> #include <linux/platform_device.h> #include <linux/soc/ixp4xx/qmgr.h> -#include <mach/hardware.h> #include <linux/soc/ixp4xx/cpu.h> static struct qmgr_regs __iomem *qmgr_regs; @@ -147,12 +146,12 @@ static irqreturn_t qmgr_irq1_a0(int irq, void *pdev) /* ACK - it may clear any bits so don't rely on it */ __raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[0]); - en_bitmap = qmgr_regs->irqen[0]; + en_bitmap = __raw_readl(&qmgr_regs->irqen[0]); while (en_bitmap) { i = __fls(en_bitmap); /* number of the last "low" queue */ en_bitmap &= ~BIT(i); - src = qmgr_regs->irqsrc[i >> 3]; - stat = qmgr_regs->stat1[i >> 3]; + src = __raw_readl(&qmgr_regs->irqsrc[i >> 3]); + stat = __raw_readl(&qmgr_regs->stat1[i >> 3]); if (src & 4) /* the IRQ condition is inverted */ stat = ~stat; if (stat & BIT(src & 3)) { @@ -172,7 +171,8 @@ static irqreturn_t qmgr_irq2_a0(int irq, void *pdev) /* ACK - it may clear any bits so don't rely on it */ __raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[1]); - req_bitmap = qmgr_regs->irqen[1] & qmgr_regs->statne_h; + req_bitmap = __raw_readl(&qmgr_regs->irqen[1]) & + __raw_readl(&qmgr_regs->statne_h); while (req_bitmap) { i = __fls(req_bitmap); /* number of the last "high" queue */ req_bitmap &= ~BIT(i); diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig index 20ace654553a..8b53ed1cc67e 100644 --- a/drivers/soc/tegra/Kconfig +++ b/drivers/soc/tegra/Kconfig @@ -15,7 +15,7 @@ config ARCH_TEGRA_2x_SOC select PL310_ERRATA_769419 if CACHE_L2X0 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC - select SOC_TEGRA20_VOLTAGE_COUPLER + select SOC_TEGRA20_VOLTAGE_COUPLER if REGULATOR select TEGRA_TIMER help Support for NVIDIA Tegra AP20 and T20 processors, based on the @@ -29,7 +29,7 @@ config ARCH_TEGRA_3x_SOC select PL310_ERRATA_769419 if CACHE_L2X0 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC - select SOC_TEGRA30_VOLTAGE_COUPLER + select SOC_TEGRA30_VOLTAGE_COUPLER if REGULATOR select TEGRA_TIMER help Support for NVIDIA Tegra T30 processor family, based on the @@ -155,7 +155,9 @@ config SOC_TEGRA_POWERGATE_BPMP config SOC_TEGRA20_VOLTAGE_COUPLER bool "Voltage scaling support for Tegra20 SoCs" depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST + depends on REGULATOR config SOC_TEGRA30_VOLTAGE_COUPLER bool "Voltage scaling support for Tegra30 SoCs" depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST + depends on REGULATOR diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index a2de23516553..101cc71bffa7 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -325,7 +325,15 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE; f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE; f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->dtr = op->data.dtr && op->cmd.dtr && op->addr.dtr; + + /* + * For an op to be DTR, cmd phase along with every other non-empty + * phase should have dtr field set to 1. If an op phase has zero + * nbytes, ignore its dtr field; otherwise, check its dtr field. + */ + f_pdata->dtr = op->cmd.dtr && + (!op->addr.nbytes || op->addr.dtr) && + (!op->data.nbytes || op->data.dtr); switch (op->data.buswidth) { case 0: @@ -1228,8 +1236,15 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem, { bool all_true, all_false; - all_true = op->cmd.dtr && op->addr.dtr && op->dummy.dtr && - op->data.dtr; + /* + * op->dummy.dtr is required for converting nbytes into ncycles. + * Also, don't check the dtr field of the op phase having zero nbytes. + */ + all_true = op->cmd.dtr && + (!op->addr.nbytes || op->addr.dtr) && + (!op->dummy.nbytes || op->dummy.dtr) && + (!op->data.nbytes || op->data.dtr); + all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr && !op->data.dtr; diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 4aee3db6d6df..fa68e9817929 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -505,7 +505,9 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, struct spi_message *msg) { struct spi_device *spi = msg->spi; + struct spi_transfer *xfer; u32 ctrl = MX51_ECSPI_CTRL_ENABLE; + u32 min_speed_hz = ~0U; u32 testreg, delay; u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG); @@ -577,9 +579,21 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, * be asserted before the SCLK polarity changes, which would disrupt * the SPI communication as the device on the other end would consider * the change of SCLK polarity as a clock tick already. + * + * Because spi_imx->spi_bus_clk is only set in bitbang prepare_message + * callback, iterate over all the transfers in spi_message, find the + * one with lowest bus frequency, and use that bus frequency for the + * delay calculation. In case all transfers have speed_hz == 0, then + * min_speed_hz is ~0 and the resulting delay is zero. */ - delay = (2 * 1000000) / spi_imx->spi_bus_clk; - if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + if (!xfer->speed_hz) + continue; + min_speed_hz = min(xfer->speed_hz, min_speed_hz); + } + + delay = (2 * 1000000) / min_speed_hz; + if (likely(delay < 10)) /* SCLK is faster than 200 kHz */ udelay(delay); else /* SCLK is _very_ slow */ usleep_range(delay, delay + 10); diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index b2c4621db34d..c208efeadd18 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -785,6 +785,8 @@ static int meson_spicc_remove(struct platform_device *pdev) clk_disable_unprepare(spicc->core); clk_disable_unprepare(spicc->pclk); + spi_master_put(spicc->master); + return 0; } diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 68dca8ceb3ad..7914255521c3 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -426,24 +426,15 @@ static int mtk_spi_fifo_transfer(struct spi_master *master, mtk_spi_prepare_transfer(master, xfer); mtk_spi_setup_packet(master); - cnt = xfer->len / 4; - if (xfer->tx_buf) + if (xfer->tx_buf) { + cnt = xfer->len / 4; iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); - - if (xfer->rx_buf) - ioread32_rep(mdata->base + SPI_RX_DATA_REG, xfer->rx_buf, cnt); - - remainder = xfer->len % 4; - if (remainder > 0) { - reg_val = 0; - if (xfer->tx_buf) { + remainder = xfer->len % 4; + if (remainder > 0) { + reg_val = 0; memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); writel(reg_val, mdata->base + SPI_TX_DATA_REG); } - if (xfer->rx_buf) { - reg_val = readl(mdata->base + SPI_RX_DATA_REG); - memcpy(xfer->rx_buf + (cnt * 4), ®_val, remainder); - } } mtk_spi_enable_transfer(master); diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c index 37dfc6e82804..9708b7827ff7 100644 --- a/drivers/spi/spi-mux.c +++ b/drivers/spi/spi-mux.c @@ -167,10 +167,17 @@ err_put_ctlr: return ret; } +static const struct spi_device_id spi_mux_id[] = { + { "spi-mux" }, + { } +}; +MODULE_DEVICE_TABLE(spi, spi_mux_id); + static const struct of_device_id spi_mux_of_match[] = { { .compatible = "spi-mux" }, { } }; +MODULE_DEVICE_TABLE(of, spi_mux_of_match); static struct spi_driver spi_mux_driver = { .probe = spi_mux_probe, @@ -178,6 +185,7 @@ static struct spi_driver spi_mux_driver = { .name = "spi-mux", .of_match_table = spi_mux_of_match, }, + .id_table = spi_mux_id, }; module_spi_driver(spi_mux_driver); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c99181165321..e4dc593b1f32 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -58,6 +58,10 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf) const struct spi_device *spi = to_spi_device(dev); int len; + len = of_device_modalias(dev, buf, PAGE_SIZE); + if (len != -ENODEV) + return len; + len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1); if (len != -ENODEV) return len; diff --git a/drivers/staging/mt7621-pci/pci-mt7621.c b/drivers/staging/mt7621-pci/pci-mt7621.c index 691030e1a5ed..f9bdf4e33134 100644 --- a/drivers/staging/mt7621-pci/pci-mt7621.c +++ b/drivers/staging/mt7621-pci/pci-mt7621.c @@ -422,7 +422,6 @@ static void mt7621_pcie_init_ports(struct mt7621_pcie *pcie) dev_err(dev, "pcie%d no card, disable it (RST & CLK)\n", slot); mt7621_control_assert(port); - clk_disable_unprepare(port->clk); port->enabled = false; if (slot == 0) { diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 19a02e958865..8fcdf89da8aa 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev, static int cards_found; int err; - devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter)); + devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter), + &pdev->dev); if (!devlink) return -ENOMEM; @@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev, goto netdev_free; } - err = devlink_register(devlink, &pdev->dev); + err = devlink_register(devlink); if (err) goto netdev_free; diff --git a/drivers/staging/rtl8712/hal_init.c b/drivers/staging/rtl8712/hal_init.c index 22974277afa0..4eff3fdecdb8 100644 --- a/drivers/staging/rtl8712/hal_init.c +++ b/drivers/staging/rtl8712/hal_init.c @@ -29,21 +29,31 @@ #define FWBUFF_ALIGN_SZ 512 #define MAX_DUMP_FWSZ (48 * 1024) +static void rtl871x_load_fw_fail(struct _adapter *adapter) +{ + struct usb_device *udev = adapter->dvobjpriv.pusbdev; + struct device *dev = &udev->dev; + struct device *parent = dev->parent; + + complete(&adapter->rtl8712_fw_ready); + + dev_err(&udev->dev, "r8712u: Firmware request failed\n"); + + if (parent) + device_lock(parent); + + device_release_driver(dev); + + if (parent) + device_unlock(parent); +} + static void rtl871x_load_fw_cb(const struct firmware *firmware, void *context) { struct _adapter *adapter = context; if (!firmware) { - struct usb_device *udev = adapter->dvobjpriv.pusbdev; - struct usb_interface *usb_intf = adapter->pusb_intf; - - dev_err(&udev->dev, "r8712u: Firmware request failed\n"); - usb_put_dev(udev); - usb_set_intfdata(usb_intf, NULL); - r8712_free_drv_sw(adapter); - adapter->dvobj_deinit(adapter); - complete(&adapter->rtl8712_fw_ready); - free_netdev(adapter->pnetdev); + rtl871x_load_fw_fail(adapter); return; } adapter->fw = firmware; diff --git a/drivers/staging/rtl8712/rtl8712_led.c b/drivers/staging/rtl8712/rtl8712_led.c index 5901026949f2..d5fc9026b036 100644 --- a/drivers/staging/rtl8712/rtl8712_led.c +++ b/drivers/staging/rtl8712/rtl8712_led.c @@ -1820,3 +1820,11 @@ void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction) break; } } + +void r8712_flush_led_works(struct _adapter *padapter) +{ + struct led_priv *pledpriv = &padapter->ledpriv; + + flush_work(&pledpriv->SwLed0.BlinkWorkItem); + flush_work(&pledpriv->SwLed1.BlinkWorkItem); +} diff --git a/drivers/staging/rtl8712/rtl871x_led.h b/drivers/staging/rtl8712/rtl871x_led.h index ee19c873cf01..2f0768132ad8 100644 --- a/drivers/staging/rtl8712/rtl871x_led.h +++ b/drivers/staging/rtl8712/rtl871x_led.h @@ -112,6 +112,7 @@ struct led_priv { void r8712_InitSwLeds(struct _adapter *padapter); void r8712_DeInitSwLeds(struct _adapter *padapter); void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction); +void r8712_flush_led_works(struct _adapter *padapter); #endif diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.c b/drivers/staging/rtl8712/rtl871x_pwrctrl.c index 23cff43437e2..cd6d9ff0bebc 100644 --- a/drivers/staging/rtl8712/rtl871x_pwrctrl.c +++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.c @@ -224,3 +224,11 @@ void r8712_unregister_cmd_alive(struct _adapter *padapter) } mutex_unlock(&pwrctrl->mutex_lock); } + +void r8712_flush_rwctrl_works(struct _adapter *padapter) +{ + struct pwrctrl_priv *pwrctrl = &padapter->pwrctrlpriv; + + flush_work(&pwrctrl->SetPSModeWorkItem); + flush_work(&pwrctrl->rpwm_workitem); +} diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.h b/drivers/staging/rtl8712/rtl871x_pwrctrl.h index bf6623cfaf27..b35b9c7920eb 100644 --- a/drivers/staging/rtl8712/rtl871x_pwrctrl.h +++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.h @@ -108,5 +108,6 @@ void r8712_cpwm_int_hdl(struct _adapter *padapter, void r8712_set_ps_mode(struct _adapter *padapter, uint ps_mode, uint smart_ps); void r8712_set_rpwm(struct _adapter *padapter, u8 val8); +void r8712_flush_rwctrl_works(struct _adapter *padapter); #endif /* __RTL871X_PWRCTRL_H_ */ diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index 2434b13c8b12..505ebeb643dc 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -591,35 +591,30 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf) { struct net_device *pnetdev = usb_get_intfdata(pusb_intf); struct usb_device *udev = interface_to_usbdev(pusb_intf); + struct _adapter *padapter = netdev_priv(pnetdev); + + /* never exit with a firmware callback pending */ + wait_for_completion(&padapter->rtl8712_fw_ready); + usb_set_intfdata(pusb_intf, NULL); + release_firmware(padapter->fw); + if (drvpriv.drv_registered) + padapter->surprise_removed = true; + if (pnetdev->reg_state != NETREG_UNINITIALIZED) + unregister_netdev(pnetdev); /* will call netdev_close() */ + r8712_flush_rwctrl_works(padapter); + r8712_flush_led_works(padapter); + udelay(1); + /* Stop driver mlme relation timer */ + r8712_stop_drv_timers(padapter); + r871x_dev_unload(padapter); + r8712_free_drv_sw(padapter); + free_netdev(pnetdev); + + /* decrease the reference count of the usb device structure + * when disconnect + */ + usb_put_dev(udev); - if (pnetdev) { - struct _adapter *padapter = netdev_priv(pnetdev); - - /* never exit with a firmware callback pending */ - wait_for_completion(&padapter->rtl8712_fw_ready); - pnetdev = usb_get_intfdata(pusb_intf); - usb_set_intfdata(pusb_intf, NULL); - if (!pnetdev) - goto firmware_load_fail; - release_firmware(padapter->fw); - if (drvpriv.drv_registered) - padapter->surprise_removed = true; - if (pnetdev->reg_state != NETREG_UNINITIALIZED) - unregister_netdev(pnetdev); /* will call netdev_close() */ - flush_scheduled_work(); - udelay(1); - /* Stop driver mlme relation timer */ - r8712_stop_drv_timers(padapter); - r871x_dev_unload(padapter); - r8712_free_drv_sw(padapter); - free_netdev(pnetdev); - - /* decrease the reference count of the usb device structure - * when disconnect - */ - usb_put_dev(udev); - } -firmware_load_fail: /* If we didn't unplug usb dongle and remove/insert module, driver * fails on sitesurvey for the first time when device is up. * Reset usb port for sitesurvey fail issue. diff --git a/drivers/staging/rtl8723bs/Kconfig b/drivers/staging/rtl8723bs/Kconfig index a88467334dac..7eae820eae3b 100644 --- a/drivers/staging/rtl8723bs/Kconfig +++ b/drivers/staging/rtl8723bs/Kconfig @@ -5,6 +5,7 @@ config RTL8723BS depends on m select WIRELESS_EXT select WEXT_PRIV + select CRYPTO_LIB_ARC4 help This option enables support for RTL8723BS SDIO drivers, such as the wifi found on the 1st gen Intel Compute Stick, the CHIP diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c index 2dd251ce177e..a545832a468e 100644 --- a/drivers/staging/rtl8723bs/hal/sdio_ops.c +++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c @@ -909,6 +909,8 @@ void sd_int_dpc(struct adapter *adapter) } else { rtw_c2h_wk_cmd(adapter, (u8 *)c2h_evt); } + } else { + kfree(c2h_evt); } } else { /* Error handling for malloc fail */ diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 6e6eb836e9b6..945f03da0223 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -184,7 +184,7 @@ static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params, struct optee_msg_arg *ma; shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params), - TEE_SHM_MAPPED); + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (IS_ERR(shm)) return shm; @@ -416,11 +416,13 @@ void optee_enable_shm_cache(struct optee *optee) } /** - * optee_disable_shm_cache() - Disables caching of some shared memory allocation - * in OP-TEE + * __optee_disable_shm_cache() - Disables caching of some shared memory + * allocation in OP-TEE * @optee: main service struct + * @is_mapped: true if the cached shared memory addresses were mapped by this + * kernel, are safe to dereference, and should be freed */ -void optee_disable_shm_cache(struct optee *optee) +static void __optee_disable_shm_cache(struct optee *optee, bool is_mapped) { struct optee_call_waiter w; @@ -439,6 +441,13 @@ void optee_disable_shm_cache(struct optee *optee) if (res.result.status == OPTEE_SMC_RETURN_OK) { struct tee_shm *shm; + /* + * Shared memory references that were not mapped by + * this kernel must be ignored to prevent a crash. + */ + if (!is_mapped) + continue; + shm = reg_pair_to_ptr(res.result.shm_upper32, res.result.shm_lower32); tee_shm_free(shm); @@ -449,6 +458,27 @@ void optee_disable_shm_cache(struct optee *optee) optee_cq_wait_final(&optee->call_queue, &w); } +/** + * optee_disable_shm_cache() - Disables caching of mapped shared memory + * allocations in OP-TEE + * @optee: main service struct + */ +void optee_disable_shm_cache(struct optee *optee) +{ + return __optee_disable_shm_cache(optee, true); +} + +/** + * optee_disable_unmapped_shm_cache() - Disables caching of shared memory + * allocations in OP-TEE which are not + * currently mapped + * @optee: main service struct + */ +void optee_disable_unmapped_shm_cache(struct optee *optee) +{ + return __optee_disable_shm_cache(optee, false); +} + #define PAGELIST_ENTRIES_PER_PAGE \ ((OPTEE_MSG_NONCONTIG_PAGE_SIZE / sizeof(u64)) - 1) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index ddb8f9ecf307..5ce13b099d7d 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/arm-smccc.h> +#include <linux/crash_dump.h> #include <linux/errno.h> #include <linux/io.h> #include <linux/module.h> @@ -277,7 +278,8 @@ static void optee_release(struct tee_context *ctx) if (!ctxdata) return; - shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm)) { arg = tee_shm_get_va(shm, 0); /* @@ -572,6 +574,13 @@ static optee_invoke_fn *get_invoke_func(struct device *dev) return ERR_PTR(-EINVAL); } +/* optee_remove - Device Removal Routine + * @pdev: platform device information struct + * + * optee_remove is called by platform subsystem to alert the driver + * that it should release the device + */ + static int optee_remove(struct platform_device *pdev) { struct optee *optee = platform_get_drvdata(pdev); @@ -602,6 +611,18 @@ static int optee_remove(struct platform_device *pdev) return 0; } +/* optee_shutdown - Device Removal Routine + * @pdev: platform device information struct + * + * platform_shutdown is called by the platform subsystem to alert + * the driver that a shutdown, reboot, or kexec is happening and + * device must be disabled. + */ +static void optee_shutdown(struct platform_device *pdev) +{ + optee_disable_shm_cache(platform_get_drvdata(pdev)); +} + static int optee_probe(struct platform_device *pdev) { optee_invoke_fn *invoke_fn; @@ -612,6 +633,16 @@ static int optee_probe(struct platform_device *pdev) u32 sec_caps; int rc; + /* + * The kernel may have crashed at the same time that all available + * secure world threads were suspended and we cannot reschedule the + * suspended threads without access to the crashed kernel's wait_queue. + * Therefore, we cannot reliably initialize the OP-TEE driver in the + * kdump kernel. + */ + if (is_kdump_kernel()) + return -ENODEV; + invoke_fn = get_invoke_func(&pdev->dev); if (IS_ERR(invoke_fn)) return PTR_ERR(invoke_fn); @@ -686,6 +717,15 @@ static int optee_probe(struct platform_device *pdev) optee->memremaped_shm = memremaped_shm; optee->pool = pool; + /* + * Ensure that there are no pre-existing shm objects before enabling + * the shm cache so that there's no chance of receiving an invalid + * address during shutdown. This could occur, for example, if we're + * kexec booting from an older kernel that did not properly cleanup the + * shm cache. + */ + optee_disable_unmapped_shm_cache(optee); + optee_enable_shm_cache(optee); if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) @@ -728,6 +768,7 @@ MODULE_DEVICE_TABLE(of, optee_dt_match); static struct platform_driver optee_driver = { .probe = optee_probe, .remove = optee_remove, + .shutdown = optee_shutdown, .driver = { .name = "optee", .of_match_table = optee_dt_match, diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index e25b216a14ef..dbdd367be156 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -159,6 +159,7 @@ int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session); void optee_enable_shm_cache(struct optee *optee); void optee_disable_shm_cache(struct optee *optee); +void optee_disable_unmapped_shm_cache(struct optee *optee); int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, struct page **pages, size_t num_pages, diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 1849180b0278..efbaff7ad7e5 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -314,7 +314,7 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = cmd_alloc_suppl(ctx, sz); break; case OPTEE_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: arg->ret = TEEC_ERROR_BAD_PARAMETERS; @@ -502,7 +502,8 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { case OPTEE_SMC_RPC_FUNC_ALLOC: - shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, param->a1, + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { reg_pair_from_64(¶m->a1, ¶m->a2, pa); reg_pair_from_64(¶m->a4, ¶m->a5, diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index d767eebf30bd..c41a9a501a6e 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -27,13 +27,19 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, shm->paddr = page_to_phys(page); shm->size = PAGE_SIZE << order; - if (shm->flags & TEE_SHM_DMA_BUF) { + /* + * Shared memory private to the OP-TEE driver doesn't need + * to be registered with OP-TEE. + */ + if (!(shm->flags & TEE_SHM_PRIV)) { unsigned int nr_pages = 1 << order, i; struct page **pages; pages = kcalloc(nr_pages, sizeof(pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (!pages) { + rc = -ENOMEM; + goto err; + } for (i = 0; i < nr_pages; i++) { pages[i] = page; @@ -44,15 +50,21 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, rc = optee_shm_register(shm->ctx, shm, pages, nr_pages, (unsigned long)shm->kaddr); kfree(pages); + if (rc) + goto err; } + return 0; + +err: + __free_pages(page, order); return rc; } static void pool_op_free(struct tee_shm_pool_mgr *poolm, struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) + if (!(shm->flags & TEE_SHM_PRIV)) optee_shm_unregister(shm->ctx, shm); free_pages((unsigned long)shm->kaddr, get_order(shm->size)); diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 00472f5ce22e..8a9384a64f3e 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -117,7 +117,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) return ERR_PTR(-EINVAL); } - if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) { + if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF | TEE_SHM_PRIV))) { dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags); return ERR_PTR(-EINVAL); } @@ -193,6 +193,24 @@ err_dev_put: } EXPORT_SYMBOL_GPL(tee_shm_alloc); +/** + * tee_shm_alloc_kernel_buf() - Allocate shared memory for kernel buffer + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * + * The returned memory registered in secure world and is suitable to be + * passed as a memory buffer in parameter argument to + * tee_client_invoke_func(). The memory allocated is later freed with a + * call to tee_shm_free(). + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size) +{ + return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED); +} +EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf); + struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, size_t length, u32 flags) { diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 83b1ef3d5d03..10d6b228cc94 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1875,18 +1875,6 @@ static struct attribute *switch_attrs[] = { NULL, }; -static bool has_port(const struct tb_switch *sw, enum tb_port_type type) -{ - const struct tb_port *port; - - tb_switch_for_each_port(sw, port) { - if (!port->disabled && port->config.type == type) - return true; - } - - return false; -} - static umode_t switch_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1895,8 +1883,7 @@ static umode_t switch_attr_is_visible(struct kobject *kobj, if (attr == &dev_attr_authorized.attr) { if (sw->tb->security_level == TB_SECURITY_NOPCIE || - sw->tb->security_level == TB_SECURITY_DPONLY || - !has_port(sw, TB_TYPE_PCIE_UP)) + sw->tb->security_level == TB_SECURITY_DPONLY) return 0; } else if (attr == &dev_attr_device.attr) { if (!sw->device) diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 4caab8714e2c..2350fb3bb5e4 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -329,6 +329,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int iir, lsr; + unsigned long flags; unsigned int space, count; iir = serial_port_in(port, UART_IIR); @@ -336,7 +337,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); lsr = serial_port_in(port, UART_LSR); @@ -370,7 +371,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (lsr & UART_LSR_THRE) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 4e75d2e4f87c..fc65a2293ce9 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -30,10 +30,11 @@ struct fsl8250_data { int fsl8250_handle_irq(struct uart_port *port) { unsigned char lsr, orig_lsr; + unsigned long flags; unsigned int iir; struct uart_8250_port *up = up_to_u8250p(port); - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); iir = port->serial_in(port, UART_IIR); if (iir & UART_IIR_NO_INT) { @@ -82,7 +83,7 @@ int fsl8250_handle_irq(struct uart_port *port) up->lsr_saved_flags = orig_lsr; - uart_unlock_and_check_sysrq(&up->port); + uart_unlock_and_check_sysrq_irqrestore(&up->port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index f7d3023f860f..fb65dc601b23 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -93,10 +93,13 @@ static void mtk8250_dma_rx_complete(void *param) struct dma_tx_state state; int copied, total, cnt; unsigned char *ptr; + unsigned long flags; if (data->rx_status == DMA_RX_SHUTDOWN) return; + spin_lock_irqsave(&up->port.lock, flags); + dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state); total = dma->rx_size - state.residue; cnt = total; @@ -120,6 +123,8 @@ static void mtk8250_dma_rx_complete(void *param) tty_flip_buffer_push(tty_port); mtk8250_rx_dma(up); + + spin_unlock_irqrestore(&up->port.lock, flags); } static void mtk8250_rx_dma(struct uart_8250_port *up) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 75827b608fdb..a808c283883e 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3836,6 +3836,12 @@ static const struct pci_device_id blacklist[] = { { PCI_VDEVICE(INTEL, 0x0f0c), }, { PCI_VDEVICE(INTEL, 0x228a), }, { PCI_VDEVICE(INTEL, 0x228c), }, + { PCI_VDEVICE(INTEL, 0x4b96), }, + { PCI_VDEVICE(INTEL, 0x4b97), }, + { PCI_VDEVICE(INTEL, 0x4b98), }, + { PCI_VDEVICE(INTEL, 0x4b99), }, + { PCI_VDEVICE(INTEL, 0x4b9a), }, + { PCI_VDEVICE(INTEL, 0x4b9b), }, { PCI_VDEVICE(INTEL, 0x9ce3), }, { PCI_VDEVICE(INTEL, 0x9ce4), }, @@ -3996,6 +4002,7 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) if (pci_match_id(pci_use_msi, dev)) { dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n"); pci_set_master(dev); + uart.port.flags &= ~UPF_SHARE_IRQ; rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES); } else { dev_dbg(&dev->dev, "Using legacy interrupts\n"); diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2164290cbd31..1da29a219842 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -311,7 +311,11 @@ static const struct serial8250_config uart_config[] = { /* Uart divisor latch read */ static int default_serial_dl_read(struct uart_8250_port *up) { - return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8; + /* Assign these in pieces to truncate any bits above 7. */ + unsigned char dll = serial_in(up, UART_DLL); + unsigned char dlm = serial_in(up, UART_DLM); + + return dll | dlm << 8; } /* Uart divisor latch write */ @@ -1297,9 +1301,11 @@ static void autoconfig(struct uart_8250_port *up) serial_out(up, UART_LCR, 0); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - scratch = serial_in(up, UART_IIR) >> 6; - switch (scratch) { + /* Assign this as it is to truncate any bits above 7. */ + scratch = serial_in(up, UART_IIR); + + switch (scratch >> 6) { case 0: autoconfig_8250(up); break; @@ -1893,11 +1899,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) unsigned char status; struct uart_8250_port *up = up_to_u8250p(port); bool skip_rx = false; + unsigned long flags; if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); status = serial_port_in(port, UART_LSR); @@ -1923,7 +1930,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) (up->ier & UART_IER_THRI)) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 508128ddfa01..f0e5da77ed6d 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1415,7 +1415,7 @@ static unsigned int lpuart_get_mctrl(struct uart_port *port) static unsigned int lpuart32_get_mctrl(struct uart_port *port) { - unsigned int mctrl = 0; + unsigned int mctrl = TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; u32 reg; reg = lpuart32_read(port, UARTCTRL); diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 0c1e4df52215..ef11860cd69e 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty freq = uartclk; if (freq == 0) { dev_err(dev, "Cannot get clock rate\n"); - return -EINVAL; + ret = -EINVAL; + goto out_clk; } if (xtal) { diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 222032792d6c..eba5b9ecba34 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -1045,9 +1045,11 @@ static int tegra_uart_hw_init(struct tegra_uart_port *tup) if (tup->cdata->fifo_mode_enable_status) { ret = tegra_uart_wait_fifo_mode_enabled(tup); - dev_err(tup->uport.dev, "FIFO mode not enabled\n"); - if (ret < 0) + if (ret < 0) { + dev_err(tup->uport.dev, + "Failed to enable FIFO mode: %d\n", ret); return ret; + } } else { /* * For all tegra devices (up to t210), there is a hardware diff --git a/drivers/usb/cdns3/cdns3-ep0.c b/drivers/usb/cdns3/cdns3-ep0.c index 02ec7ab4bb48..e29989d57bef 100644 --- a/drivers/usb/cdns3/cdns3-ep0.c +++ b/drivers/usb/cdns3/cdns3-ep0.c @@ -731,6 +731,7 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep, request->actual = 0; priv_dev->status_completion_no_call = true; priv_dev->pending_status_request = request; + usb_gadget_set_state(&priv_dev->gadget, USB_STATE_CONFIGURED); spin_unlock_irqrestore(&priv_dev->lock, flags); /* diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c23f53e9b1ef..27df0c697897 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1882,7 +1882,7 @@ static int __cdnsp_gadget_init(struct cdns *cdns) pdev->gadget.name = "cdnsp-gadget"; pdev->gadget.speed = USB_SPEED_UNKNOWN; pdev->gadget.sg_supported = 1; - pdev->gadget.max_speed = USB_SPEED_SUPER_PLUS; + pdev->gadget.max_speed = max_speed; pdev->gadget.lpm_capable = 1; pdev->setup_buf = kzalloc(CDNSP_EP0_SETUP_SIZE, GFP_KERNEL); diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 783ca8ffde00..f740fa6089d8 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -383,8 +383,8 @@ struct cdnsp_intr_reg { #define IMAN_IE BIT(1) #define IMAN_IP BIT(0) /* bits 2:31 need to be preserved */ -#define IMAN_IE_SET(p) (((p) & IMAN_IE) | 0x2) -#define IMAN_IE_CLEAR(p) (((p) & IMAN_IE) & ~(0x2)) +#define IMAN_IE_SET(p) ((p) | IMAN_IE) +#define IMAN_IE_CLEAR(p) ((p) & ~IMAN_IE) /* IMOD - Interrupter Moderation Register - irq_control bitmasks. */ /* diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 68972746e363..1b1438457fb0 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1932,15 +1932,13 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) } if (enqd_len + trb_buff_len >= full_len) { - if (need_zero_pkt && zero_len_trb) { - zero_len_trb = true; - } else { - field &= ~TRB_CHAIN; - field |= TRB_IOC; - more_trbs_coming = false; - need_zero_pkt = false; - preq->td.last_trb = ring->enqueue; - } + if (need_zero_pkt) + zero_len_trb = !zero_len_trb; + + field &= ~TRB_CHAIN; + field |= TRB_IOC; + more_trbs_coming = false; + preq->td.last_trb = ring->enqueue; } /* Only set interrupt on short packet for OUT endpoints. */ @@ -1955,7 +1953,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); - cdnsp_queue_trb(pdev, ring, more_trbs_coming | need_zero_pkt, + cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb, lower_32_bits(send_addr), upper_32_bits(send_addr), length_field, diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 74d5a9c5238a..73f419adce61 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -2324,17 +2324,10 @@ static void usbtmc_interrupt(struct urb *urb) dev_err(dev, "overflow with length %d, actual length is %d\n", data->iin_wMaxPacketSize, urb->actual_length); fallthrough; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - case -EILSEQ: - case -ETIME: - case -EPIPE: + default: /* urb terminated, clean up */ dev_dbg(dev, "urb terminated, status: %d\n", status); return; - default: - dev_err(dev, "unknown status received: %d\n", status); } exit: rv = usb_submit_urb(urb, GFP_ATOMIC); diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 3740cf95560e..0697fde51d00 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -193,7 +193,11 @@ static void otg_start_hnp_polling(struct otg_fsm *fsm) if (!fsm->host_req_flag) return; - INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + if (!fsm->hnp_work_inited) { + INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + fsm->hnp_work_inited = true; + } + schedule_delayed_work(&fsm->hnp_polling_work, msecs_to_jiffies(T_HOST_REQ_POLL)); } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 45f2bc0807e8..84fe57ef5a49 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2250,6 +2250,17 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) } /* + * Avoid issuing a runtime resume if the device is already in the + * suspended state during gadget disconnect. DWC3 gadget was already + * halted/stopped during runtime suspend. + */ + if (!is_on) { + pm_runtime_barrier(dwc->dev); + if (pm_runtime_suspended(dwc->dev)) + return 0; + } + + /* * Check the return value for successful resume, or error. For a * successful resume, the DWC3 runtime PM resume routine will handle * the run stop sequence, so avoid duplicate operations here. diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 02683ac0719d..bb476e121eae 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -41,6 +41,7 @@ struct f_hidg { unsigned char bInterfaceSubClass; unsigned char bInterfaceProtocol; unsigned char protocol; + unsigned char idle; unsigned short report_desc_length; char *report_desc; unsigned short report_length; @@ -338,6 +339,11 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, spin_lock_irqsave(&hidg->write_spinlock, flags); + if (!hidg->req) { + spin_unlock_irqrestore(&hidg->write_spinlock, flags); + return -ESHUTDOWN; + } + #define WRITE_COND (!hidg->write_pending) try_again: /* write queue */ @@ -358,8 +364,14 @@ try_again: count = min_t(unsigned, count, hidg->report_length); spin_unlock_irqrestore(&hidg->write_spinlock, flags); - status = copy_from_user(req->buf, buffer, count); + if (!req) { + ERROR(hidg->func.config->cdev, "hidg->req is NULL\n"); + status = -ESHUTDOWN; + goto release_write_pending; + } + + status = copy_from_user(req->buf, buffer, count); if (status != 0) { ERROR(hidg->func.config->cdev, "copy_from_user error\n"); @@ -387,14 +399,17 @@ try_again: spin_unlock_irqrestore(&hidg->write_spinlock, flags); + if (!hidg->in_ep->enabled) { + ERROR(hidg->func.config->cdev, "in_ep is disabled\n"); + status = -ESHUTDOWN; + goto release_write_pending; + } + status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); - if (status < 0) { - ERROR(hidg->func.config->cdev, - "usb_ep_queue error on int endpoint %zd\n", status); + if (status < 0) goto release_write_pending; - } else { + else status = count; - } return status; release_write_pending: @@ -523,6 +538,14 @@ static int hidg_setup(struct usb_function *f, goto respond; break; + case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 + | HID_REQ_GET_IDLE): + VDBG(cdev, "get_idle\n"); + length = min_t(unsigned int, length, 1); + ((u8 *) req->buf)[0] = hidg->idle; + goto respond; + break; + case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 | HID_REQ_SET_REPORT): VDBG(cdev, "set_report | wLength=%d\n", ctrl->wLength); @@ -546,6 +569,14 @@ static int hidg_setup(struct usb_function *f, goto stall; break; + case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 + | HID_REQ_SET_IDLE): + VDBG(cdev, "set_idle\n"); + length = 0; + hidg->idle = value >> 8; + goto respond; + break; + case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8 | USB_REQ_GET_DESCRIPTOR): switch (value >> 8) { @@ -773,6 +804,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) hidg_interface_desc.bInterfaceSubClass = hidg->bInterfaceSubClass; hidg_interface_desc.bInterfaceProtocol = hidg->bInterfaceProtocol; hidg->protocol = HID_REPORT_PROTOCOL; + hidg->idle = 1; hidg_ss_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length); hidg_ss_in_comp_desc.wBytesPerInterval = cpu_to_le16(hidg->report_length); diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c index 34f4db554977..d2a2b20cc1ad 100644 --- a/drivers/usb/gadget/udc/max3420_udc.c +++ b/drivers/usb/gadget/udc/max3420_udc.c @@ -1255,12 +1255,14 @@ static int max3420_probe(struct spi_device *spi) err = devm_request_irq(&spi->dev, irq, max3420_irq_handler, 0, "max3420", udc); if (err < 0) - return err; + goto del_gadget; udc->thread_task = kthread_create(max3420_thread, udc, "max3420-thread"); - if (IS_ERR(udc->thread_task)) - return PTR_ERR(udc->thread_task); + if (IS_ERR(udc->thread_task)) { + err = PTR_ERR(udc->thread_task); + goto del_gadget; + } irq = of_irq_get_byname(spi->dev.of_node, "vbus"); if (irq <= 0) { /* no vbus irq implies self-powered design */ @@ -1280,10 +1282,14 @@ static int max3420_probe(struct spi_device *spi) err = devm_request_irq(&spi->dev, irq, max3420_vbus_handler, 0, "vbus", udc); if (err < 0) - return err; + goto del_gadget; } return 0; + +del_gadget: + usb_del_gadget_udc(&udc->gadget); + return err; } static int max3420_remove(struct spi_device *spi) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 9bbd7ddd0003..a24aea3d2759 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -611,8 +611,6 @@ ohci_hcd_at91_drv_suspend(struct device *dev) if (ohci_at91->wakeup) enable_irq_wake(hcd->irq); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); - ret = ohci_suspend(hcd, ohci_at91->wakeup); if (ret) { if (ohci_at91->wakeup) @@ -632,7 +630,10 @@ ohci_hcd_at91_drv_suspend(struct device *dev) /* flush the writes */ (void) ohci_readl (ohci, &ohci->regs->control); msleep(1); + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); at91_stop_clock(ohci_at91); + } else { + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); } return ret; @@ -644,6 +645,8 @@ ohci_hcd_at91_drv_resume(struct device *dev) struct usb_hcd *hcd = dev_get_drvdata(dev); struct ohci_at91_priv *ohci_at91 = hcd_to_ohci_at91_priv(hcd); + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); + if (ohci_at91->wakeup) disable_irq_wake(hcd->irq); else @@ -651,8 +654,6 @@ ohci_hcd_at91_drv_resume(struct device *dev) ohci_resume(hcd, false); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); - return 0; } diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 640a46f0d118..f086960fe2b5 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -35,6 +35,7 @@ struct omap2430_glue { struct device *control_otghs; unsigned int is_runtime_suspended:1; unsigned int needs_resume:1; + unsigned int phy_suspended:1; }; #define glue_to_musb(g) platform_get_drvdata(g->musb) @@ -458,8 +459,10 @@ static int omap2430_runtime_suspend(struct device *dev) omap2430_low_level_exit(musb); - phy_power_off(musb->phy); - phy_exit(musb->phy); + if (!glue->phy_suspended) { + phy_power_off(musb->phy); + phy_exit(musb->phy); + } glue->is_runtime_suspended = 1; @@ -474,8 +477,10 @@ static int omap2430_runtime_resume(struct device *dev) if (!musb) return 0; - phy_init(musb->phy); - phy_power_on(musb->phy); + if (!glue->phy_suspended) { + phy_init(musb->phy); + phy_power_on(musb->phy); + } omap2430_low_level_init(musb); musb_writel(musb->mregs, OTG_INTERFSEL, @@ -489,9 +494,23 @@ static int omap2430_runtime_resume(struct device *dev) return 0; } +/* I2C and SPI PHYs need to be suspended before the glue layer */ static int omap2430_suspend(struct device *dev) { struct omap2430_glue *glue = dev_get_drvdata(dev); + struct musb *musb = glue_to_musb(glue); + + phy_power_off(musb->phy); + phy_exit(musb->phy); + glue->phy_suspended = 1; + + return 0; +} + +/* Glue layer needs to be suspended after musb_suspend() */ +static int omap2430_suspend_late(struct device *dev) +{ + struct omap2430_glue *glue = dev_get_drvdata(dev); if (glue->is_runtime_suspended) return 0; @@ -501,7 +520,7 @@ static int omap2430_suspend(struct device *dev) return omap2430_runtime_suspend(dev); } -static int omap2430_resume(struct device *dev) +static int omap2430_resume_early(struct device *dev) { struct omap2430_glue *glue = dev_get_drvdata(dev); @@ -513,10 +532,24 @@ static int omap2430_resume(struct device *dev) return omap2430_runtime_resume(dev); } +static int omap2430_resume(struct device *dev) +{ + struct omap2430_glue *glue = dev_get_drvdata(dev); + struct musb *musb = glue_to_musb(glue); + + phy_init(musb->phy); + phy_power_on(musb->phy); + glue->phy_suspended = 0; + + return 0; +} + static const struct dev_pm_ops omap2430_pm_ops = { .runtime_suspend = omap2430_runtime_suspend, .runtime_resume = omap2430_runtime_resume, .suspend = omap2430_suspend, + .suspend_late = omap2430_suspend_late, + .resume_early = omap2430_resume_early, .resume = omap2430_resume, }; diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2db917eab799..8a521b5ea769 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -851,6 +851,7 @@ static struct usb_serial_driver ch341_device = { .owner = THIS_MODULE, .name = "ch341-uart", }, + .bulk_in_size = 512, .id_table = id_table, .num_ports = 1, .open = ch341_open, diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4a1f3a95d017..33bbb3470ca3 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -219,6 +219,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_MTXORB_6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_R2000KU_TRUE_RNG) }, { USB_DEVICE(FTDI_VID, FTDI_VARDAAN_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_AUTO_M3_OP_COM_V2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0100_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0101_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0102_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index add602bebd82..755858ca20ba 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -159,6 +159,9 @@ /* Vardaan Enterprises Serial Interface VEUSB422R3 */ #define FTDI_VARDAAN_PID 0xF070 +/* Auto-M3 Ltd. - OP-COM USB V2 - OBD interface Adapter */ +#define FTDI_AUTO_M3_OP_COM_V2_PID 0x4f50 + /* * Xsens Technologies BV products (http://www.xsens.com). */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0fbe253dc570..039450069ca4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1203,6 +1203,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff), /* Telit FN980 (PCIe) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 2f2f5047452b..930b3d50a330 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -418,24 +418,34 @@ static int pl2303_detect_type(struct usb_serial *serial) bcdDevice = le16_to_cpu(desc->bcdDevice); bcdUSB = le16_to_cpu(desc->bcdUSB); - switch (bcdDevice) { - case 0x100: - /* - * Assume it's an HXN-type if the device doesn't support the old read - * request value. - */ - if (bcdUSB == 0x200 && !pl2303_supports_hx_status(serial)) - return TYPE_HXN; + switch (bcdUSB) { + case 0x110: + switch (bcdDevice) { + case 0x300: + return TYPE_HX; + case 0x400: + return TYPE_HXD; + default: + return TYPE_HX; + } break; - case 0x300: - if (bcdUSB == 0x200) + case 0x200: + switch (bcdDevice) { + case 0x100: + case 0x305: + /* + * Assume it's an HXN-type if the device doesn't + * support the old read request value. + */ + if (!pl2303_supports_hx_status(serial)) + return TYPE_HXN; + break; + case 0x300: return TYPE_TA; - - return TYPE_HX; - case 0x400: - return TYPE_HXD; - case 0x500: - return TYPE_TB; + case 0x500: + return TYPE_TB; + } + break; } dev_err(&serial->interface->dev, diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 5b22a1c931a9..b9bb63d749ec 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5369,7 +5369,7 @@ EXPORT_SYMBOL_GPL(tcpm_pd_hard_reset); void tcpm_sink_frs(struct tcpm_port *port) { spin_lock(&port->pd_event_lock); - port->pd_events = TCPM_FRS_EVENT; + port->pd_events |= TCPM_FRS_EVENT; spin_unlock(&port->pd_event_lock); kthread_queue_work(port->wq, &port->event_work); } @@ -5378,7 +5378,7 @@ EXPORT_SYMBOL_GPL(tcpm_sink_frs); void tcpm_sourcing_vbus(struct tcpm_port *port) { spin_lock(&port->pd_event_lock); - port->pd_events = TCPM_SOURCING_VBUS; + port->pd_events |= TCPM_SOURCING_VBUS; spin_unlock(&port->pd_event_lock); kthread_queue_work(port->wq, &port->event_work); } diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 21b78f1cd521..351c6cfb24c3 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -493,9 +493,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, dev, &ifc_vdpa_ops, NULL); - if (adapter == NULL) { + if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); - return -ENOMEM; + return PTR_ERR(adapter); } pci_set_master(pdev); diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index dcee6039e966..e59135fa867e 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -512,11 +512,6 @@ out: mutex_unlock(&mr->mkey_mtx); } -static bool map_empty(struct vhost_iotlb *iotlb) -{ - return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX); -} - int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, bool *change_map) { @@ -524,10 +519,6 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io int err = 0; *change_map = false; - if (map_empty(iotlb)) { - mlx5_vdpa_destroy_mr(mvdev); - return 0; - } mutex_lock(&mr->mkey_mtx); if (mr->initialized) { mlx5_vdpa_info(mvdev, "memory map update\n"); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2a31467f7ac5..5906cada2293 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -526,7 +526,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) void __iomem *uar_page = ndev->mvdev.res.uar->map; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_vdpa_cq *vcq = &mvq->cq; - unsigned int irqn; __be64 *pas; int inlen; void *cqc; @@ -566,14 +565,14 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) /* Use vector 0 by default. Consider adding code to choose least used * vector. */ - err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, 0, &eqn); if (err) goto err_vec; cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); @@ -753,12 +752,12 @@ static int get_queue_type(struct mlx5_vdpa_net *ndev) type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); /* prefer split queue */ - if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED) - return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; + if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; - WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)); + WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); - return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; } static bool vq_is_tx(u16 idx) @@ -2030,6 +2029,12 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) return -ENOSPC; mdev = mgtdev->madev->mdev; + if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { + dev_warn(mdev->device, "missing support for split virtqueues\n"); + return -EOPNOTSUPP; + } + /* we save one virtqueue for control virtqueue should we require it */ max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 14e024de5cbf..c621cf7feec0 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -251,8 +251,10 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, dev_attr->name); - if (!vdpasim) + if (IS_ERR(vdpasim)) { + ret = PTR_ERR(vdpasim); goto err_alloc; + } vdpasim->dev_attr = *dev_attr; INIT_WORK(&vdpasim->work, dev_attr->work_fn); diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 7b4a6396c553..fe0527329857 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -436,9 +436,9 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, dev, &vp_vdpa_ops, NULL); - if (vp_vdpa == NULL) { + if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); - return -ENOMEM; + return PTR_ERR(vp_vdpa); } mdev = &vp_vdpa->mdev; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 210ab35a7ebf..9479f7f79217 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -614,7 +614,8 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, long pinned; int ret = 0; - if (msg->iova < v->range.first || + if (msg->iova < v->range.first || !msg->size || + msg->iova > U64_MAX - msg->size + 1 || msg->iova + msg->size - 1 > v->range.last) return -EINVAL; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index b9e853e6094d..59edb5a1ffe2 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -735,10 +735,16 @@ static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } +/* Make sure 64 bit math will not overflow. */ static bool vhost_overflow(u64 uaddr, u64 size) { - /* Make sure 64 bit math will not overflow. */ - return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size; + if (uaddr > ULONG_MAX || size > ULONG_MAX) + return true; + + if (!size) + return false; + + return uaddr > ULONG_MAX - size + 1; } /* Caller should have vq mutex and device mutex. */ diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 4af8fa259d65..14e2043d7685 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -359,7 +359,7 @@ __vringh_iov(struct vringh *vrh, u16 i, iov = wiov; else { iov = riov; - if (unlikely(wiov && wiov->i)) { + if (unlikely(wiov && wiov->used)) { vringh_bad("Readable desc %p after writable", &descs[i]); err = -EINVAL; diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c index 0d002a355a93..fbc9f1042000 100644 --- a/drivers/virt/acrn/vm.c +++ b/drivers/virt/acrn/vm.c @@ -64,6 +64,14 @@ int acrn_vm_destroy(struct acrn_vm *vm) test_and_set_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags)) return 0; + ret = hcall_destroy_vm(vm->vmid); + if (ret < 0) { + dev_err(acrn_dev.this_device, + "Failed to destroy VM %u\n", vm->vmid); + clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags); + return ret; + } + /* Remove from global VM list */ write_lock_bh(&acrn_vm_list_lock); list_del_init(&vm->list); @@ -78,14 +86,6 @@ int acrn_vm_destroy(struct acrn_vm *vm) vm->monitor_page = NULL; } - ret = hcall_destroy_vm(vm->vmid); - if (ret < 0) { - dev_err(acrn_dev.this_device, - "Failed to destroy VM %u\n", vm->vmid); - clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags); - return ret; - } - acrn_vm_all_ram_unmap(vm); dev_dbg(acrn_dev.this_device, "VM %u destroyed.\n", vm->vmid); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 4b15c00c0a0a..49984d2cba24 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -355,6 +355,7 @@ int register_virtio_device(struct virtio_device *dev) virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); INIT_LIST_HEAD(&dev->vqs); + spin_lock_init(&dev->vqs_list_lock); /* * device_add() causes the bus infrastructure to look for a matching diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 222d630c41fc..b35bb2d57f62 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -576,6 +576,13 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); + /* + * Device is marked broken on surprise removal so that virtio upper + * layers can abort any ongoing operation. + */ + if (!pci_device_is_present(pci_dev)) + virtio_break_device(&vp_dev->vdev); + pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 89bfe46a8a7f..dd95dfd85e98 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/hrtimer.h> #include <linux/dma-mapping.h> +#include <linux/spinlock.h> #include <xen/xen.h> #ifdef DEBUG @@ -1755,7 +1756,9 @@ static struct virtqueue *vring_create_virtqueue_packed( cpu_to_le16(vq->packed.event_flags_shadow); } + spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); return &vq->vq; err_desc_extra: @@ -2229,7 +2232,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); + spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); return &vq->vq; err_extra: @@ -2291,6 +2296,10 @@ void vring_del_virtqueue(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + spin_lock(&vq->vq.vdev->vqs_list_lock); + list_del(&_vq->list); + spin_unlock(&vq->vq.vdev->vqs_list_lock); + if (vq->we_own_ring) { if (vq->packed_ring) { vring_free_queue(vq->vq.vdev, @@ -2321,7 +2330,6 @@ void vring_del_virtqueue(struct virtqueue *_vq) kfree(vq->split.desc_state); kfree(vq->split.desc_extra); } - list_del(&_vq->list); kfree(vq); } EXPORT_SYMBOL_GPL(vring_del_virtqueue); @@ -2373,7 +2381,7 @@ bool virtqueue_is_broken(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - return vq->broken; + return READ_ONCE(vq->broken); } EXPORT_SYMBOL_GPL(virtqueue_is_broken); @@ -2385,10 +2393,14 @@ void virtio_break_device(struct virtio_device *dev) { struct virtqueue *_vq; + spin_lock(&dev->vqs_list_lock); list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); - vq->broken = true; + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, true); } + spin_unlock(&dev->vqs_list_lock); } EXPORT_SYMBOL_GPL(virtio_break_device); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index e1a141135992..72eaef2caeb1 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -151,6 +151,9 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (!name) return NULL; + if (index >= vdpa->nvqs) + return ERR_PTR(-ENOENT); + /* Queue shouldn't already be set up. */ if (ops->get_vq_ready(vdpa, index)) return ERR_PTR(-ENOENT); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index d7e361fb0548..a78704ae3618 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -198,12 +198,12 @@ static void disable_dynirq(struct irq_data *data); static DEFINE_PER_CPU(unsigned int, irq_epoch); -static void clear_evtchn_to_irq_row(unsigned row) +static void clear_evtchn_to_irq_row(int *evtchn_row) { unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - WRITE_ONCE(evtchn_to_irq[row][col], -1); + WRITE_ONCE(evtchn_row[col], -1); } static void clear_evtchn_to_irq_all(void) @@ -213,7 +213,7 @@ static void clear_evtchn_to_irq_all(void) for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) { if (evtchn_to_irq[row] == NULL) continue; - clear_evtchn_to_irq_row(row); + clear_evtchn_to_irq_row(evtchn_to_irq[row]); } } @@ -221,6 +221,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) { unsigned row; unsigned col; + int *evtchn_row; if (evtchn >= xen_evtchn_max_channels()) return -EINVAL; @@ -233,11 +234,18 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) if (irq == -1) return 0; - evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL); - if (evtchn_to_irq[row] == NULL) + evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0); + if (evtchn_row == NULL) return -ENOMEM; - clear_evtchn_to_irq_row(row); + clear_evtchn_to_irq_row(evtchn_row); + + /* + * We've prepared an empty row for the mapping. If a different + * thread was faster inserting it, we can drop ours. + */ + if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL) + free_page((unsigned long) evtchn_row); } WRITE_ONCE(evtchn_to_irq[row][col], irq); @@ -1009,7 +1017,7 @@ static void __unbind_from_irq(unsigned int irq) int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name) { - int irq = -1; + int irq; struct physdev_irq irq_op; int ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0117d867ecf8..06f9f167222b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9226,8 +9226,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, bool dest_log_pinned = false; bool need_abort = false; - /* we only allow rename subvolume link between subvolumes */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) + /* + * For non-subvolumes allow exchange only within one subvolume, in the + * same inode namespace. Two subvolumes (represented as directory) can + * be exchanged as they're a logical link and have a fixed inode number. + */ + if (root != dest && + (old_ino != BTRFS_FIRST_FREE_OBJECTID || + new_ino != BTRFS_FIRST_FREE_OBJECTID)) return -EXDEV; /* close the race window with snapshot create/destroy ioctl */ diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bdefd0c789a..2a2900903f8c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4150,11 +4150,19 @@ bad: /* * Delayed work handler to process end of delayed cap release LRU list. + * + * If new caps are added to the list while processing it, these won't get + * processed in this run. In this case, the ci->i_hold_caps_max will be + * returned so that the work can be scheduled accordingly. */ -void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) +unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) { struct inode *inode; struct ceph_inode_info *ci; + struct ceph_mount_options *opt = mdsc->fsc->mount_options; + unsigned long delay_max = opt->caps_wanted_delay_max * HZ; + unsigned long loop_start = jiffies; + unsigned long delay = 0; dout("check_delayed_caps\n"); spin_lock(&mdsc->cap_delay_lock); @@ -4162,6 +4170,11 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) ci = list_first_entry(&mdsc->cap_delay_list, struct ceph_inode_info, i_cap_delay_list); + if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) { + dout("%s caps added recently. Exiting loop", __func__); + delay = ci->i_hold_caps_max; + break; + } if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 && time_before(jiffies, ci->i_hold_caps_max)) break; @@ -4177,6 +4190,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) } } spin_unlock(&mdsc->cap_delay_lock); + + return delay; } /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9db1b39df773..afdc20213876 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4490,22 +4490,29 @@ void inc_session_sequence(struct ceph_mds_session *s) } /* - * delayed work -- periodically trim expired leases, renew caps with mds + * delayed work -- periodically trim expired leases, renew caps with mds. If + * the @delay parameter is set to 0 or if it's more than 5 secs, the default + * workqueue delay value of 5 secs will be used. */ -static void schedule_delayed(struct ceph_mds_client *mdsc) +static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay) { - int delay = 5; - unsigned hz = round_jiffies_relative(HZ * delay); - schedule_delayed_work(&mdsc->delayed_work, hz); + unsigned long max_delay = HZ * 5; + + /* 5 secs default delay */ + if (!delay || (delay > max_delay)) + delay = max_delay; + schedule_delayed_work(&mdsc->delayed_work, + round_jiffies_relative(delay)); } static void delayed_work(struct work_struct *work) { - int i; struct ceph_mds_client *mdsc = container_of(work, struct ceph_mds_client, delayed_work.work); + unsigned long delay; int renew_interval; int renew_caps; + int i; dout("mdsc delayed_work\n"); @@ -4545,7 +4552,7 @@ static void delayed_work(struct work_struct *work) } mutex_unlock(&mdsc->mutex); - ceph_check_delayed_caps(mdsc); + delay = ceph_check_delayed_caps(mdsc); ceph_queue_cap_reclaim_work(mdsc); @@ -4553,7 +4560,7 @@ static void delayed_work(struct work_struct *work) maybe_recover_session(mdsc); - schedule_delayed(mdsc); + schedule_delayed(mdsc, delay); } int ceph_mdsc_init(struct ceph_fs_client *fsc) @@ -5030,7 +5037,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) mdsc->mdsmap->m_epoch); mutex_unlock(&mdsc->mutex); - schedule_delayed(mdsc); + schedule_delayed(mdsc, 0); return; bad_unlock: diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4ac0606dcbd4..4c6bd1042c94 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -67,19 +67,19 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc, { lockdep_assert_held(&mdsc->snap_rwsem); - dout("get_realm %p %d -> %d\n", realm, - atomic_read(&realm->nref), atomic_read(&realm->nref)+1); /* - * since we _only_ increment realm refs or empty the empty - * list with snap_rwsem held, adjusting the empty list here is - * safe. we do need to protect against concurrent empty list - * additions, however. + * The 0->1 and 1->0 transitions must take the snap_empty_lock + * atomically with the refcount change. Go ahead and bump the + * nref here, unless it's 0, in which case we take the spinlock + * and then do the increment and remove it from the list. */ - if (atomic_inc_return(&realm->nref) == 1) { - spin_lock(&mdsc->snap_empty_lock); + if (atomic_inc_not_zero(&realm->nref)) + return; + + spin_lock(&mdsc->snap_empty_lock); + if (atomic_inc_return(&realm->nref) == 1) list_del_init(&realm->empty_item); - spin_unlock(&mdsc->snap_empty_lock); - } + spin_unlock(&mdsc->snap_empty_lock); } static void __insert_snap_realm(struct rb_root *root, @@ -208,28 +208,28 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc, { lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); + /* + * We do not require the snap_empty_lock here, as any caller that + * increments the value must hold the snap_rwsem. + */ if (atomic_dec_and_test(&realm->nref)) __destroy_snap_realm(mdsc, realm); } /* - * caller needn't hold any locks + * See comments in ceph_get_snap_realm. Caller needn't hold any locks. */ void ceph_put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { - dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); - if (!atomic_dec_and_test(&realm->nref)) + if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock)) return; if (down_write_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&mdsc->snap_empty_lock); __destroy_snap_realm(mdsc, realm); up_write(&mdsc->snap_rwsem); } else { - spin_lock(&mdsc->snap_empty_lock); list_add(&realm->empty_item, &mdsc->snap_empty); spin_unlock(&mdsc->snap_empty_lock); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6b6332a5c113..9215a2f4535c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1167,7 +1167,7 @@ extern void ceph_flush_snaps(struct ceph_inode_info *ci, extern bool __ceph_should_report_size(struct ceph_inode_info *ci); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); -extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); +extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc); extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); extern int ceph_drop_caps_for_unlink(struct inode *inode); extern int ceph_encode_inode_release(void **p, struct inode *inode, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c0bfc2f01030..c6a9542ca281 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1611,6 +1611,11 @@ struct dfs_info3_param { int ttl; }; +struct file_list { + struct list_head list; + struct cifsFileInfo *cfile; +}; + /* * common struct for holding inode info when searching for or updating an * inode with new info diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 79402ca0ddfa..5f8a302ffcb2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -100,7 +100,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0; - s = dentry_path_raw(direntry, page, PAGE_SIZE); + s = dentry_path_raw(direntry, page, PATH_MAX); if (IS_ERR(s)) return s; if (!s[1]) // for root we want "", not "/" diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cd108607a070..bb98fbdd22a9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4619,7 +4619,7 @@ read_complete: static int cifs_readpage(struct file *file, struct page *page) { - loff_t offset = (loff_t)page->index << PAGE_SHIFT; + loff_t offset = page_file_offset(page); int rc = -EACCES; unsigned int xid; @@ -4848,34 +4848,33 @@ void cifs_oplock_break(struct work_struct *work) oplock_break_ack: /* - * releasing stale oplock after recent reconnect of smb session using - * a now incorrect file handle is not a data integrity issue but do - * not bother sending an oplock release if session to server still is - * disconnected since oplock already released by the server - */ - if (!cfile->oplock_break_cancelled) { - rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, - cinode); - cifs_dbg(FYI, "Oplock release rc = %d\n", rc); - } - /* * When oplock break is received and there are no active * file handles but cached, then schedule deferred close immediately. * So, new open will not use cached handle. */ spin_lock(&CIFS_I(inode)->deferred_lock); is_deferred = cifs_is_deferred_close(cfile, &dclose); + spin_unlock(&CIFS_I(inode)->deferred_lock); if (is_deferred && cfile->deferred_close_scheduled && delayed_work_pending(&cfile->deferred)) { - /* - * If there is no pending work, mod_delayed_work queues new work. - * So, Increase the ref count to avoid use-after-free. - */ - if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) - cifsFileInfo_get(cfile); + if (cancel_delayed_work(&cfile->deferred)) { + _cifsFileInfo_put(cfile, false, false); + goto oplock_break_done; + } } - spin_unlock(&CIFS_I(inode)->deferred_lock); + /* + * releasing stale oplock after recent reconnect of smb session using + * a now incorrect file handle is not a data integrity issue but do + * not bother sending an oplock release if session to server still is + * disconnected since oplock already released by the server + */ + if (!cfile->oplock_break_cancelled) { + rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, + cinode); + cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + } +oplock_break_done: _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); cifs_done_oplock_break(cinode); } diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 9a59d7ff9a11..eed59bc1d913 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -925,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->cred_uid = uid; ctx->cruid_specified = true; break; + case Opt_backupuid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + goto cifs_parse_mount_err; + ctx->backupuid = uid; + ctx->backupuid_specified = true; + break; case Opt_backupgid: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b96b253e7635..65f8a70cece3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1625,7 +1625,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto unlink_out; } - cifs_close_all_deferred_files(tcon); + cifs_close_deferred_file(CIFS_I(inode)); if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = CIFSPOSIXDelFile(xid, tcon, full_path, @@ -2084,6 +2084,7 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, FILE_UNIX_BASIC_INFO *info_buf_target; unsigned int xid; int rc, tmprc; + int retry_count = 0; if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -2113,10 +2114,24 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, goto cifs_rename_exit; } - cifs_close_all_deferred_files(tcon); + cifs_close_deferred_file(CIFS_I(d_inode(source_dentry))); + if (d_inode(target_dentry) != NULL) + cifs_close_deferred_file(CIFS_I(d_inode(target_dentry))); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); + if (rc == -EACCES) { + while (retry_count < 3) { + cifs_close_all_deferred_files(tcon); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, + to_name); + if (rc != -EACCES) + break; + retry_count++; + } + } + /* * No-replace is the natural behavior for CIFS, so skip unlink hacks. */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 844abeb2b48f..9469f1cf0b46 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -723,13 +723,31 @@ void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) { struct cifsFileInfo *cfile = NULL; - struct cifs_deferred_close *dclose; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; + + if (cifs_inode == NULL) + return; + INIT_LIST_HEAD(&file_head); + spin_lock(&cifs_inode->open_file_lock); list_for_each_entry(cfile, &cifs_inode->openFileList, flist) { - spin_lock(&cifs_inode->deferred_lock); - if (cifs_is_deferred_close(cfile, &dclose)) - mod_delayed_work(deferredclose_wq, &cfile->deferred, 0); - spin_unlock(&cifs_inode->deferred_lock); + if (delayed_work_pending(&cfile->deferred)) { + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + continue; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } + } + } + spin_unlock(&cifs_inode->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); } } @@ -738,20 +756,30 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) { struct cifsFileInfo *cfile; struct list_head *tmp; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; + INIT_LIST_HEAD(&file_head); spin_lock(&tcon->open_file_lock); list_for_each(tmp, &tcon->openFileList) { cfile = list_entry(tmp, struct cifsFileInfo, tlist); if (delayed_work_pending(&cfile->deferred)) { - /* - * If there is no pending work, mod_delayed_work queues new work. - * So, Increase the ref count to avoid use-after-free. - */ - if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) - cifsFileInfo_get(cfile); + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + continue; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } } } spin_unlock(&tcon->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); + } } /* parses DFS refferal V3 structure diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 23d6f4d71649..2dfd0d8297eb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3617,7 +3617,8 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, char *buf) { struct cifs_io_parms io_parms = {0}; - int rc, nbytes; + int nbytes; + int rc = 0; struct kvec iov[2]; io_parms.netfid = cfile->fid.netfid; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 781d14e5f2af..b6d2e3591927 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2426,7 +2426,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) memcpy(aclptr, &acl, sizeof(struct cifs_acl)); buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); - *len = ptr - (__u8 *)buf; + *len = roundup(ptr - (__u8 *)buf, 8); return buf; } diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 5a0be9985bae..0ad32150611e 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -177,28 +177,22 @@ out: return retval; } -/* Fill [buffer, buffer + pos) with data coming from @from. */ -static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos, +/* Fill @buffer with data coming from @from. */ +static int fill_write_buffer(struct configfs_buffer *buffer, struct iov_iter *from) { - loff_t to_copy; int copied; - u8 *to; if (!buffer->page) buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0); if (!buffer->page) return -ENOMEM; - to_copy = SIMPLE_ATTR_SIZE - 1 - pos; - if (to_copy <= 0) - return 0; - to = buffer->page + pos; - copied = copy_from_iter(to, to_copy, from); + copied = copy_from_iter(buffer->page, SIMPLE_ATTR_SIZE - 1, from); buffer->needs_read_fill = 1; /* if buf is assumed to contain a string, terminate it by \0, * so e.g. sscanf() can scan the string easily */ - to[copied] = 0; + buffer->page[copied] = 0; return copied ? : -EFAULT; } @@ -227,10 +221,10 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct configfs_buffer *buffer = file->private_data; - ssize_t len; + int len; mutex_lock(&buffer->mutex); - len = fill_write_buffer(buffer, iocb->ki_pos, from); + len = fill_write_buffer(buffer, from); if (len > 0) len = flush_write_buffer(file, buffer, len); if (len > 0) @@ -722,7 +722,7 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d return rc; id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); if (rc < 0) { dax_read_unlock(id); return rc; diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b96ecba91899..b60f0152ea57 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -244,9 +244,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. - * - * If the handle isn't valid we're not journaling, but we still need to - * call into ext4_journal_revoke() to put the buffer head. */ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, int is_metadata, struct inode *inode, diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index bc364c119af6..cebea4270817 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -138,7 +138,7 @@ static int kmmpd(void *data) unsigned mmp_check_interval; unsigned long last_update_time; unsigned long diff; - int retval; + int retval = 0; mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5fd56f616cf0..f3bbcd4efb56 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2517,7 +2517,7 @@ again: goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); - if (err) + if (restart || err) goto journal_error; } else { struct dx_root *dxroot; diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index e55723744f58..9d58371d22c2 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1235,8 +1235,6 @@ void fuse_dax_conn_free(struct fuse_conn *fc) static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) { long nr_pages, nr_ranges; - void *kaddr; - pfn_t pfn; struct fuse_dax_mapping *range; int ret, id; size_t dax_size = -1; @@ -1248,8 +1246,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); - nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr, - &pfn); + nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL, + NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); diff --git a/fs/io-wq.c b/fs/io-wq.c index cf086b01c6c6..7d2ed8c7dd31 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -129,7 +129,8 @@ struct io_cb_cancel_data { bool cancel_all; }; -static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first); +static void io_wqe_dec_running(struct io_worker *worker); static bool io_worker_get(struct io_worker *worker) { @@ -168,26 +169,21 @@ static void io_worker_exit(struct io_worker *worker) { struct io_wqe *wqe = worker->wqe; struct io_wqe_acct *acct = io_wqe_get_acct(worker); - unsigned flags; if (refcount_dec_and_test(&worker->ref)) complete(&worker->ref_done); wait_for_completion(&worker->ref_done); - preempt_disable(); - current->flags &= ~PF_IO_WORKER; - flags = worker->flags; - worker->flags = 0; - if (flags & IO_WORKER_F_RUNNING) - atomic_dec(&acct->nr_running); - worker->flags = 0; - preempt_enable(); - raw_spin_lock_irq(&wqe->lock); - if (flags & IO_WORKER_F_FREE) + if (worker->flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); acct->nr_workers--; + preempt_disable(); + io_wqe_dec_running(worker); + worker->flags = 0; + current->flags &= ~PF_IO_WORKER; + preempt_enable(); raw_spin_unlock_irq(&wqe->lock); kfree_rcu(worker, rcu); @@ -214,15 +210,19 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe) struct hlist_nulls_node *n; struct io_worker *worker; - n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list)); - if (is_a_nulls(n)) - return false; - - worker = hlist_nulls_entry(n, struct io_worker, nulls_node); - if (io_worker_get(worker)) { - wake_up_process(worker->task); + /* + * Iterate free_list and see if we can find an idle worker to + * activate. If a given worker is on the free_list but in the process + * of exiting, keep trying. + */ + hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { + if (!io_worker_get(worker)) + continue; + if (wake_up_process(worker->task)) { + io_worker_release(worker); + return true; + } io_worker_release(worker); - return true; } return false; @@ -247,10 +247,21 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) ret = io_wqe_activate_free_worker(wqe); rcu_read_unlock(); - if (!ret && acct->nr_workers < acct->max_workers) { - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - create_io_worker(wqe->wq, wqe, acct->index); + if (!ret) { + bool do_create = false, first = false; + + raw_spin_lock_irq(&wqe->lock); + if (acct->nr_workers < acct->max_workers) { + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + if (!acct->nr_workers) + first = true; + acct->nr_workers++; + do_create = true; + } + raw_spin_unlock_irq(&wqe->lock); + if (do_create) + create_io_worker(wqe->wq, wqe, acct->index, first); } } @@ -271,10 +282,28 @@ static void create_worker_cb(struct callback_head *cb) { struct create_worker_data *cwd; struct io_wq *wq; + struct io_wqe *wqe; + struct io_wqe_acct *acct; + bool do_create = false, first = false; cwd = container_of(cb, struct create_worker_data, work); - wq = cwd->wqe->wq; - create_io_worker(wq, cwd->wqe, cwd->index); + wqe = cwd->wqe; + wq = wqe->wq; + acct = &wqe->acct[cwd->index]; + raw_spin_lock_irq(&wqe->lock); + if (acct->nr_workers < acct->max_workers) { + if (!acct->nr_workers) + first = true; + acct->nr_workers++; + do_create = true; + } + raw_spin_unlock_irq(&wqe->lock); + if (do_create) { + create_io_worker(wq, wqe, cwd->index, first); + } else { + atomic_dec(&acct->nr_running); + io_worker_ref_put(wq); + } kfree(cwd); } @@ -612,7 +641,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk) raw_spin_unlock_irq(&worker->wqe->lock); } -static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) +static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first) { struct io_wqe_acct *acct = &wqe->acct[index]; struct io_worker *worker; @@ -635,6 +664,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) kfree(worker); fail: atomic_dec(&acct->nr_running); + raw_spin_lock_irq(&wqe->lock); + acct->nr_workers--; + raw_spin_unlock_irq(&wqe->lock); io_worker_ref_put(wq); return; } @@ -650,9 +682,8 @@ fail: worker->flags |= IO_WORKER_F_FREE; if (index == IO_WQ_ACCT_BOUND) worker->flags |= IO_WORKER_F_BOUND; - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) + if (first && (worker->flags & IO_WORKER_F_BOUND)) worker->flags |= IO_WORKER_F_FIXED; - acct->nr_workers++; raw_spin_unlock_irq(&wqe->lock); wake_up_new_task(tsk); } diff --git a/fs/io_uring.c b/fs/io_uring.c index bf548af0426c..04c6d059ea94 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -78,6 +78,7 @@ #include <linux/task_work.h> #include <linux/pagemap.h> #include <linux/io_uring.h> +#include <linux/tracehook.h> #define CREATE_TRACE_POINTS #include <trace/events/io_uring.h> @@ -1499,7 +1500,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) all_flushed = list_empty(&ctx->cq_overflow_list); if (all_flushed) { clear_bit(0, &ctx->check_cq_overflow); - ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW); } if (posted) @@ -1578,7 +1580,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, } if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->check_cq_overflow); - ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW); + } ocqe->cqe.user_data = user_data; ocqe->cqe.res = res; @@ -2222,9 +2226,9 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) static inline bool io_run_task_work(void) { - if (current->task_works) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { __set_current_state(TASK_RUNNING); - task_work_run(); + tracehook_notify_signal(); return true; } @@ -6803,14 +6807,16 @@ static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx) { /* Tell userspace we may need a wakeup call */ spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP); spin_unlock_irq(&ctx->completion_lock); } static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) { spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP); spin_unlock_irq(&ctx->completion_lock); } @@ -7132,16 +7138,6 @@ static void **io_alloc_page_table(size_t size) return table; } -static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx) -{ - spin_lock_bh(&ctx->rsrc_ref_lock); -} - -static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx) -{ - spin_unlock_bh(&ctx->rsrc_ref_lock); -} - static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) { percpu_ref_exit(&ref_node->refs); @@ -7158,9 +7154,9 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx, struct io_rsrc_node *rsrc_node = ctx->rsrc_node; rsrc_node->rsrc_data = data_to_kill; - io_rsrc_ref_lock(ctx); + spin_lock_irq(&ctx->rsrc_ref_lock); list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); - io_rsrc_ref_unlock(ctx); + spin_unlock_irq(&ctx->rsrc_ref_lock); atomic_inc(&data_to_kill->refs); percpu_ref_kill(&rsrc_node->refs); @@ -7199,17 +7195,19 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct /* kill initial ref, already quiesced if zero */ if (atomic_dec_and_test(&data->refs)) break; + mutex_unlock(&ctx->uring_lock); flush_delayed_work(&ctx->rsrc_put_work); ret = wait_for_completion_interruptible(&data->done); - if (!ret) + if (!ret) { + mutex_lock(&ctx->uring_lock); break; + } atomic_inc(&data->refs); /* wait for all works potentially completing data->done */ flush_delayed_work(&ctx->rsrc_put_work); reinit_completion(&data->done); - mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(); mutex_lock(&ctx->uring_lock); } while (ret >= 0); @@ -7668,9 +7666,10 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) { struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); struct io_ring_ctx *ctx = node->rsrc_data->ctx; + unsigned long flags; bool first_add = false; - io_rsrc_ref_lock(ctx); + spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); node->done = true; while (!list_empty(&ctx->rsrc_ref_list)) { @@ -7682,7 +7681,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) list_del(&node->node); first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); } - io_rsrc_ref_unlock(ctx); + spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); if (first_add) mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); @@ -8653,13 +8652,10 @@ static void io_req_caches_free(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static bool io_wait_rsrc_data(struct io_rsrc_data *data) +static void io_wait_rsrc_data(struct io_rsrc_data *data) { - if (!data) - return false; - if (!atomic_dec_and_test(&data->refs)) + if (data && !atomic_dec_and_test(&data->refs)) wait_for_completion(&data->done); - return true; } static void io_ring_ctx_free(struct io_ring_ctx *ctx) @@ -8671,10 +8667,14 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) ctx->mm_account = NULL; } + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); + io_wait_rsrc_data(ctx->file_data); + mutex_lock(&ctx->uring_lock); - if (io_wait_rsrc_data(ctx->buf_data)) + if (ctx->buf_data) __io_sqe_buffers_unregister(ctx); - if (io_wait_rsrc_data(ctx->file_data)) + if (ctx->file_data) __io_sqe_files_unregister(ctx); if (ctx->rings) __io_cqring_overflow_flush(ctx, true); diff --git a/fs/namespace.c b/fs/namespace.c index ab4174a3c802..f79d9471cb76 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1938,6 +1938,20 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } +static bool has_locked_children(struct mount *mnt, struct dentry *dentry) +{ + struct mount *child; + + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { + if (!is_subdir(child->mnt_mountpoint, dentry)) + continue; + + if (child->mnt.mnt_flags & MNT_LOCKED) + return true; + } + return false; +} + /** * clone_private_mount - create a private clone of a path * @path: path to clone @@ -1953,10 +1967,19 @@ struct vfsmount *clone_private_mount(const struct path *path) struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; + down_read(&namespace_sem); if (IS_MNT_UNBINDABLE(old_mnt)) - return ERR_PTR(-EINVAL); + goto invalid; + + if (!check_mnt(old_mnt)) + goto invalid; + + if (has_locked_children(old_mnt, path->dentry)) + goto invalid; new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); + up_read(&namespace_sem); + if (IS_ERR(new_mnt)) return ERR_CAST(new_mnt); @@ -1964,6 +1987,10 @@ struct vfsmount *clone_private_mount(const struct path *path) new_mnt->mnt_ns = MNT_NS_INTERNAL; return &new_mnt->mnt; + +invalid: + up_read(&namespace_sem); + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(clone_private_mount); @@ -2315,19 +2342,6 @@ static int do_change_type(struct path *path, int ms_flags) return err; } -static bool has_locked_children(struct mount *mnt, struct dentry *dentry) -{ - struct mount *child; - list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { - if (!is_subdir(child->mnt_mountpoint, dentry)) - continue; - - if (child->mnt.mnt_flags & MNT_LOCKED) - return true; - } - return false; -} - static struct mount *__do_loopback(struct path *old_path, int recurse) { struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 64864fb40b40..28b67cb9458d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -54,22 +54,27 @@ static int fanotify_max_queued_events __read_mostly; #include <linux/sysctl.h> +static long ft_zero = 0; +static long ft_int_max = INT_MAX; + struct ctl_table fanotify_table[] = { { .procname = "max_user_groups", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &ft_zero, + .extra2 = &ft_int_max, }, { .procname = "max_user_marks", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &ft_zero, + .extra2 = &ft_int_max, }, { .procname = "max_queued_events", diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 98f61b31745a..62051247f6d2 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -55,22 +55,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #include <linux/sysctl.h> +static long it_zero = 0; +static long it_int_max = INT_MAX; + struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_user_watches", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_queued_events", diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 41ebf52f1bbc..ebde05c9cf62 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -392,6 +392,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, */ take_dentry_name_snapshot(&name, real); this = lookup_one_len(name.name.name, connected, name.name.len); + release_dentry_name_snapshot(&name); err = PTR_ERR(this); if (IS_ERR(this)) { goto fail; @@ -406,7 +407,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, } out: - release_dentry_name_snapshot(&name); dput(parent); inode_unlock(dir); return this; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 4d53d3b7e5fe..d081faa55e83 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -392,6 +392,51 @@ out_unlock: return ret; } +/* + * Calling iter_file_splice_write() directly from overlay's f_op may deadlock + * due to lock order inversion between pipe->mutex in iter_file_splice_write() + * and file_start_write(real.file) in ovl_write_iter(). + * + * So do everything ovl_write_iter() does and call iter_file_splice_write() on + * the real file. + */ +static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct fd real; + const struct cred *old_cred; + struct inode *inode = file_inode(out); + struct inode *realinode = ovl_inode_real(inode); + ssize_t ret; + + inode_lock(inode); + /* Update mode */ + ovl_copyattr(realinode, inode); + ret = file_remove_privs(out); + if (ret) + goto out_unlock; + + ret = ovl_real_fdget(out, &real); + if (ret) + goto out_unlock; + + old_cred = ovl_override_creds(inode->i_sb); + file_start_write(real.file); + + ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); + + file_end_write(real.file); + /* Update size */ + ovl_copyattr(realinode, inode); + revert_creds(old_cred); + fdput(real); + +out_unlock: + inode_unlock(inode); + + return ret; +} + static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fd real; @@ -603,7 +648,7 @@ const struct file_operations ovl_file_operations = { .fadvise = ovl_fadvise, .flush = ovl_flush, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index e8ad2c2c77dd..150fdf3bc68d 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -481,6 +481,8 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) } this = lookup_one_len(p->name, dir, p->len); if (IS_ERR_OR_NULL(this) || !this->d_inode) { + /* Mark a stale entry */ + p->is_whiteout = true; if (IS_ERR(this)) { err = PTR_ERR(this); this = NULL; @@ -776,6 +778,9 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) if (err) goto out; } + } + /* ovl_cache_update_ino() sets is_whiteout on stale entry */ + if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; } diff --git a/fs/pipe.c b/fs/pipe.c index 9ef4231cce61..678dee2a8228 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -32,6 +32,21 @@ #include "internal.h" /* + * New pipe buffers will be restricted to this size while the user is exceeding + * their pipe buffer quota. The general pipe use case needs at least two + * buffers: one for data yet to be read, and one for new data. If this is less + * than two, then a write to a non-empty pipe may block even if the pipe is not + * full. This can occur with GNU make jobserver or similar uses of pipes as + * semaphores: multiple processes may be waiting to write tokens back to the + * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. + * + * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their + * own risk, namely: pipe writes to non-full pipes may block until the pipe is + * emptied. + */ +#define PIPE_MIN_DEF_BUFFERS 2 + +/* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size */ @@ -429,9 +444,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Epoll nonsensically wants a wakeup whether the pipe - * was already empty or not. - * * If it wasn't empty we try to merge new data into * the last buffer. * @@ -440,9 +452,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * spanning multiple pages. */ head = pipe->head; - was_empty = true; + was_empty = pipe_empty(head, pipe->tail); chars = total_len & (PAGE_SIZE-1); - if (chars && !pipe_empty(head, pipe->tail)) { + if (chars && !was_empty) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; @@ -575,8 +587,11 @@ out: * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs + * + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. */ - if (was_empty) { + if (was_empty || pipe->poll_usage) { wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -639,6 +654,9 @@ pipe_poll(struct file *filp, poll_table *wait) struct pipe_inode_info *pipe = filp->private_data; unsigned int head, tail; + /* Epoll has some historical nasty semantics, this enables them */ + pipe->poll_usage = 1; + /* * Reading pipe state only -- no need for acquiring the semaphore. * @@ -781,8 +799,8 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { - user_bufs = account_pipe_buffers(user, pipe_bufs, 1); - pipe_bufs = 1; + user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); + pipe_bufs = PIPE_MIN_DEF_BUFFERS; } if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index d548ea4b6aab..2c5bcbc19264 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -411,7 +411,16 @@ struct xfs_log_dinode { /* start of the extended dinode, writable fields */ uint32_t di_crc; /* CRC of the inode */ uint64_t di_changecount; /* number of attribute changes */ - xfs_lsn_t di_lsn; /* flush sequence */ + + /* + * The LSN we write to this field during formatting is not a reflection + * of the current on-disk LSN. It should never be used for recovery + * sequencing, nor should it be recovered into the on-disk inode at all. + * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() + * for details. + */ + xfs_lsn_t di_lsn; + uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ uint8_t di_pad2[12]; /* more padding for future expansion */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a3391..4775485b4062 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer( static xfs_lsn_t xlog_recover_get_buf_lsn( struct xfs_mount *mp, - struct xfs_buf *bp) + struct xfs_buf *bp, + struct xfs_buf_log_format *buf_f) { uint32_t magic32; uint16_t magic16; @@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn( void *blk = bp->b_addr; uuid_t *uuid; xfs_lsn_t lsn = -1; + uint16_t blft; /* v4 filesystems always recover immediately */ if (!xfs_sb_version_hascrc(&mp->m_sb)) goto recover_immediately; + /* + * realtime bitmap and summary file blocks do not have magic numbers or + * UUIDs, so we must recover them immediately. + */ + blft = xfs_blft_from_flags(buf_f); + if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF) + goto recover_immediately; + magic32 = be32_to_cpu(*(__be32 *)blk); switch (magic32) { case XFS_ABTB_CRC_MAGIC: @@ -796,6 +806,7 @@ xlog_recover_get_buf_lsn( switch (magicda) { case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: case XFS_DA3_NODE_MAGIC: lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; @@ -919,7 +930,7 @@ xlog_recover_buf_commit_pass2( * the verifier will be reset to match whatever recover turns that * buffer into. */ - lsn = xlog_recover_get_buf_lsn(mp, bp); + lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 7b79518b6c20..e0072a6cd2d3 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( STATIC void xfs_log_dinode_to_disk( struct xfs_log_dinode *from, - struct xfs_dinode *to) + struct xfs_dinode *to, + xfs_lsn_t lsn) { to->di_magic = cpu_to_be16(from->di_magic); to->di_mode = cpu_to_be16(from->di_mode); @@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); + to->di_lsn = cpu_to_be64(lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); to->di_flushiter = 0; @@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( } /* - * If the inode has an LSN in it, recover the inode only if it's less - * than the lsn of the transaction we are replaying. Note: we still - * need to replay an owner change even though the inode is more recent - * than the transaction as there is no guarantee that all the btree - * blocks are more recent than this transaction, too. + * If the inode has an LSN in it, recover the inode only if the on-disk + * inode's LSN is older than the lsn of the transaction we are + * replaying. We can have multiple checkpoints with the same start LSN, + * so the current LSN being equal to the on-disk LSN doesn't necessarily + * mean that the on-disk inode is more recent than the change being + * replayed. + * + * We must check the current_lsn against the on-disk inode + * here because the we can't trust the log dinode to contain a valid LSN + * (see comment below before replaying the log dinode for details). + * + * Note: we still need to replay an owner change even though the inode + * is more recent than the transaction as there is no guarantee that all + * the btree blocks are more recent than this transaction, too. */ if (dip->di_version >= 3) { xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto out_owner_change; @@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( goto out_release; } - /* recover the log dinode inode into the on disk inode */ - xfs_log_dinode_to_disk(ldip, dip); + /* + * Recover the log dinode inode into the on disk inode. + * + * The LSN in the log dinode is garbage - it can be zero or reflect + * stale in-memory runtime state that isn't coherent with the changes + * logged in this transaction or the changes written to the on-disk + * inode. Hence we write the current lSN into the inode because that + * matches what xfs_iflush() would write inode the inode when flushing + * the changes in this transaction. + */ + xfs_log_dinode_to_disk(ldip, dip, current_lsn); fields = in_f->ilf_fields; if (fields & XFS_ILOG_DEV) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 36fa2650b081..60ac5fd63f1e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -78,13 +78,12 @@ xlog_verify_iclog( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn); + struct xlog_in_core *iclog); #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c) -#define xlog_verify_tail_lsn(a,b,c) +#define xlog_verify_tail_lsn(a,b) #endif STATIC int @@ -487,51 +486,80 @@ out_error: return error; } -static bool -__xlog_state_release_iclog( - struct xlog *log, - struct xlog_in_core *iclog) -{ - lockdep_assert_held(&log->l_icloglock); - - if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { - /* update tail before writing to iclog */ - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); - - iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); - /* cycle incremented when incrementing curr_block */ - trace_xlog_iclog_syncing(iclog, _RET_IP_); - return true; - } - - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); - return false; -} - /* * Flush iclog to disk if this is the last reference to the given iclog and the * it is in the WANT_SYNC state. + * + * If the caller passes in a non-zero @old_tail_lsn and the current log tail + * does not match, there may be metadata on disk that must be persisted before + * this iclog is written. To satisfy that requirement, set the + * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new + * log tail value. + * + * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the + * log tail is updated correctly. NEED_FUA indicates that the iclog will be + * written to stable storage, and implies that a commit record is contained + * within the iclog. We need to ensure that the log tail does not move beyond + * the tail that the first commit record in the iclog ordered against, otherwise + * correct recovery of that checkpoint becomes dependent on future operations + * performed on this iclog. + * + * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the + * current tail into iclog. Once the iclog tail is set, future operations must + * not modify it, otherwise they potentially violate ordering constraints for + * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in + * the iclog will get zeroed on activation of the iclog after sync, so we + * always capture the tail lsn on the iclog on the first NEED_FUA release + * regardless of the number of active reference counts on this iclog. */ + int xlog_state_release_iclog( struct xlog *log, - struct xlog_in_core *iclog) + struct xlog_in_core *iclog, + xfs_lsn_t old_tail_lsn) { + xfs_lsn_t tail_lsn; lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; - if (atomic_dec_and_test(&iclog->ic_refcnt) && - __xlog_state_release_iclog(log, iclog)) { - spin_unlock(&log->l_icloglock); - xlog_sync(log, iclog); - spin_lock(&log->l_icloglock); + /* + * Grabbing the current log tail needs to be atomic w.r.t. the writing + * of the tail LSN into the iclog so we guarantee that the log tail does + * not move between deciding if a cache flush is required and writing + * the LSN into the iclog below. + */ + if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { + tail_lsn = xlog_assign_tail_lsn(log->l_mp); + + if (old_tail_lsn && tail_lsn != old_tail_lsn) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + + if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && + !iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); } + if (!atomic_dec_and_test(&iclog->ic_refcnt)) + return 0; + + if (iclog->ic_state != XLOG_STATE_WANT_SYNC) { + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + return 0; + } + + iclog->ic_state = XLOG_STATE_SYNCING; + if (!iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog); + trace_xlog_iclog_syncing(iclog, _RET_IP_); + + spin_unlock(&log->l_icloglock); + xlog_sync(log, iclog); + spin_lock(&log->l_icloglock); return 0; } @@ -774,6 +802,21 @@ xfs_log_mount_cancel( } /* + * Flush out the iclog to disk ensuring that device caches are flushed and + * the iclog hits stable storage before any completion waiters are woken. + */ +static inline int +xlog_force_iclog( + struct xlog_in_core *iclog) +{ + atomic_inc(&iclog->ic_refcnt); + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); + return xlog_state_release_iclog(iclog->ic_log, iclog, 0); +} + +/* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions * have been ordered and callbacks run before we are woken here, hence @@ -827,13 +870,6 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - /* - * For external log devices, we need to flush the data device cache - * first to ensure all metadata writeback is on stable storage before we - * stamp the tail LSN into the unmount record. - */ - if (log->l_targ != log->l_mp->m_ddev_targp) - blkdev_issue_flush(log->l_targ->bt_bdev); return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); } @@ -865,18 +901,7 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; - atomic_inc(&iclog->ic_refcnt); - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); - /* - * Ensure the journal is fully flushed and on stable storage once the - * iclog containing the unmount record is written. - */ - iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - error = xlog_state_release_iclog(log, iclog); + error = xlog_force_iclog(iclog); xlog_wait_on_iclog(iclog); if (tic) { @@ -1796,10 +1821,20 @@ xlog_write_iclog( * metadata writeback and causing priority inversions. */ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE; - if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) + if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) { iclog->ic_bio.bi_opf |= REQ_PREFLUSH; + /* + * For external log devices, we also need to flush the data + * device cache first to ensure all metadata writeback covered + * by the LSN in this iclog is on stable storage. This is slow, + * but it *must* complete before we issue the external log IO. + */ + if (log->l_targ != log->l_mp->m_ddev_targp) + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); + } if (iclog->ic_flags & XLOG_ICL_NEED_FUA) iclog->ic_bio.bi_opf |= REQ_FUA; + iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { @@ -2310,7 +2345,7 @@ xlog_write_copy_finish( return 0; release_iclog: - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); return error; } @@ -2529,7 +2564,7 @@ next_lv: ASSERT(optype & XLOG_COMMIT_TRANS); *commit_iclog = iclog; } else { - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); } spin_unlock(&log->l_icloglock); @@ -2567,6 +2602,7 @@ xlog_state_activate_iclog( memset(iclog->ic_header.h_cycle_data, 0, sizeof(iclog->ic_header.h_cycle_data)); iclog->ic_header.h_lsn = 0; + iclog->ic_header.h_tail_lsn = 0; } /* @@ -2967,7 +3003,7 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); if (error) return error; @@ -3132,6 +3168,35 @@ xlog_state_switch_iclogs( } /* + * Force the iclog to disk and check if the iclog has been completed before + * xlog_force_iclog() returns. This can happen on synchronous (e.g. + * pmem) or fast async storage because we drop the icloglock to issue the IO. + * If completion has already occurred, tell the caller so that it can avoid an + * unnecessary wait on the iclog. + */ +static int +xlog_force_and_check_iclog( + struct xlog_in_core *iclog, + bool *completed) +{ + xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); + int error; + + *completed = false; + error = xlog_force_iclog(iclog); + if (error) + return error; + + /* + * If the iclog has already been completed and reused the header LSN + * will have been rewritten by completion + */ + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + *completed = true; + return 0; +} + +/* * Write out all data in the in-core log as of this exact moment in time. * * Data may be written to the in-core log during this call. However, @@ -3165,7 +3230,6 @@ xfs_log_force( { struct xlog *log = mp->m_log; struct xlog_in_core *iclog; - xfs_lsn_t lsn; XFS_STATS_INC(mp, xs_log_force); trace_xfs_log_force(mp, 0, _RET_IP_); @@ -3193,39 +3257,33 @@ xfs_log_force( iclog = iclog->ic_prev; } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { - /* - * We are the only one with access to this iclog. - * - * Flush it out now. There should be a roundoff of zero - * to show that someone has already taken care of the - * roundoff from the previous sync. - */ - atomic_inc(&iclog->ic_refcnt); - lsn = be64_to_cpu(iclog->ic_header.h_lsn); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + /* We have exclusive access to this iclog. */ + bool completed; + + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + if (completed) goto out_unlock; } else { /* - * Someone else is writing to this iclog. - * - * Use its call to flush out the data. However, the - * other thread may not force out this LR, so we mark - * it WANT_SYNC. + * Someone else is still writing to this iclog, so we + * need to ensure that when they release the iclog it + * gets synced immediately as we may be waiting on it. */ xlog_state_switch_iclogs(log, iclog, 0); } - } else { - /* - * If the head iclog is not active nor dirty, we just attach - * ourselves to the head and go to sleep if necessary. - */ - ; } + /* + * The iclog we are about to wait on may contain the checkpoint pushed + * by the above xlog_cil_force() call, but it may not have been pushed + * to disk yet. Like the ACTIVE case above, we need to make sure caches + * are flushed when this iclog is written. + */ + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (flags & XFS_LOG_SYNC) return xlog_wait_on_iclog(iclog); out_unlock: @@ -3245,6 +3303,7 @@ xlog_force_lsn( bool already_slept) { struct xlog_in_core *iclog; + bool completed; spin_lock(&log->l_icloglock); iclog = log->l_iclog; @@ -3258,7 +3317,8 @@ xlog_force_lsn( goto out_unlock; } - if (iclog->ic_state == XLOG_STATE_ACTIVE) { + switch (iclog->ic_state) { + case XLOG_STATE_ACTIVE: /* * We sleep here if we haven't already slept (e.g. this is the * first time we've looked at the correct iclog buf) and the @@ -3281,12 +3341,31 @@ xlog_force_lsn( &log->l_icloglock); return -EAGAIN; } - atomic_inc(&iclog->ic_refcnt); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; if (log_flushed) *log_flushed = 1; + if (completed) + goto out_unlock; + break; + case XLOG_STATE_WANT_SYNC: + /* + * This iclog may contain the checkpoint pushed by the + * xlog_cil_force_seq() call, but there are other writers still + * accessing it so it hasn't been pushed to disk yet. Like the + * ACTIVE case above, we need to make sure caches are flushed + * when this iclog is written. + */ + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + break; + default: + /* + * The entire checkpoint was written by the CIL force and is on + * its way to disk already. It will be stable when it + * completes, so we don't need to manipulate caches here at all. + * We just need to wait for completion if necessary. + */ + break; } if (flags & XFS_LOG_SYNC) @@ -3559,10 +3638,10 @@ xlog_verify_grant_tail( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn) + struct xlog_in_core *iclog) { - int blocks; + xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn); + int blocks; if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { blocks = diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b128aaa9b870..4c44bc3786c0 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -654,8 +654,9 @@ xlog_cil_push_work( struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; + xfs_lsn_t preflush_tail_lsn; xfs_lsn_t commit_lsn; - xfs_lsn_t push_seq; + xfs_csn_t push_seq; struct bio bio; DECLARE_COMPLETION_ONSTACK(bdev_flush); @@ -730,7 +731,15 @@ xlog_cil_push_work( * because we hold the flush lock exclusively. Hence we can now issue * a cache flush to ensure all the completed metadata in the journal we * are about to overwrite is on stable storage. + * + * Because we are issuing this cache flush before we've written the + * tail lsn to the iclog, we can have metadata IO completions move the + * tail forwards between the completion of this flush and the iclog + * being written. In this case, we need to re-issue the cache flush + * before the iclog write. To detect whether the log tail moves, sample + * the tail LSN *before* we issue the flush. */ + preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, &bdev_flush); @@ -941,7 +950,7 @@ restart: * storage. */ commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; - xlog_state_release_iclog(log, commit_iclog); + xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn); spin_unlock(&log->l_icloglock); return; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 4c41bbfa33b0..f3e79a45d60a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -59,6 +59,16 @@ enum xlog_iclog_state { { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \ { XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" } +/* + * In core log flags + */ +#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ +#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ + +#define XLOG_ICL_STRINGS \ + { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \ + { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" } + /* * Log ticket flags @@ -143,9 +153,6 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 -#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ -#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ - /* Ticket reservation region accounting */ #define XLOG_TIC_LEN_MAX 15 @@ -497,7 +504,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); -int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); +int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, + xfs_lsn_t log_tail_lsn); /* * When we crack an atomic LSN, we sample it first so that the value will not diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f9d8d605f9b1..19260291ff8b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __field(uint32_t, state) __field(int32_t, refcount) __field(uint32_t, offset) + __field(uint32_t, flags) __field(unsigned long long, lsn) __field(unsigned long, caller_ip) ), @@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __entry->state = iclog->ic_state; __entry->refcount = atomic_read(&iclog->ic_refcnt); __entry->offset = iclog->ic_offset; + __entry->flags = iclog->ic_flags; __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS", + TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->state, XLOG_STATE_STRINGS), __entry->refcount, __entry->offset, __entry->lsn, + __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS), (char *)__entry->caller_ip) ); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 17325416e2de..62669b36a772 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -586,6 +586,7 @@ NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ + *(.text.asan.* .text.tsan.*) \ TEXT_CFI_JT \ MEM_KEEP(init.text*) \ MEM_KEEP(exit.text*) \ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 27b275e463da..2413253e54c7 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -32,6 +32,12 @@ enum can_mode { CAN_MODE_SLEEP }; +enum can_termination_gpio { + CAN_TERMINATION_GPIO_DISABLED = 0, + CAN_TERMINATION_GPIO_ENABLED, + CAN_TERMINATION_GPIO_MAX, +}; + /* * CAN common private data */ @@ -55,6 +61,8 @@ struct can_priv { unsigned int termination_const_cnt; const u16 *termination_const; u16 termination; + struct gpio_desc *termination_gpio; + u16 termination_gpio_ohms[CAN_TERMINATION_GPIO_MAX]; enum can_state state; diff --git a/include/linux/device.h b/include/linux/device.h index 59940f1744c1..65d84b67b024 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -407,6 +407,7 @@ struct dev_links_info { * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pin-control.rst for details. + * @msi_lock: Lock to protect MSI mask cache and mask register * @msi_list: Hosts MSI descriptors * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. @@ -506,6 +507,7 @@ struct device { struct dev_pin_info *pins; #endif #ifdef CONFIG_GENERIC_MSI_IRQ + raw_spinlock_t msi_lock; struct list_head msi_list; #endif #ifdef CONFIG_DMA_OPS diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 0eadc7ac44ec..6b0dc9ff92d1 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -88,4 +88,22 @@ static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */ +#if IS_ENABLED(CONFIG_NET_DSA_SJA1105) + +extern const struct dsa_switch_ops sja1105_switch_ops; + +static inline bool dsa_port_is_sja1105(struct dsa_port *dp) +{ + return dp->ds->ops == &sja1105_switch_ops; +} + +#else + +static inline bool dsa_port_is_sja1105(struct dsa_port *dp) +{ + return false; +} + +#endif + #endif /* _NET_DSA_SJA1105_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index ff698c9d1c94..1797e8506929 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -776,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, DECLARE_BPF_DISPATCHER(xdp) +DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp); + static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { @@ -783,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * under local_bh_disable(), which provides the needed RCU protection * for accessing map entries. */ - return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + + if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { + if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) + act = xdp_master_redirect(xdp); + } + + return act; } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 67e042932681..a038feb63f23 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -41,7 +41,7 @@ struct in_device { unsigned long mr_qri; /* Query Response Interval */ unsigned char mr_qrv; /* Query Robustness Variable */ unsigned char mr_gq_running; - unsigned char mr_ifc_count; + u32 mr_ifc_count; struct timer_list mr_gq_timer; /* general query timer */ struct timer_list mr_ifc_timer; /* interface change timer */ diff --git a/include/linux/irq.h b/include/linux/irq.h index 8e9a9ae471a6..c8293c817646 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -569,6 +569,7 @@ struct irq_chip { * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs * in the suspend path if they are in disabled state + * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -581,6 +582,7 @@ enum { IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), IRQCHIP_SUPPORTS_NMI = (1 << 8), IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), + IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), }; #include <linux/irqdesc.h> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bfe5c486f4ad..f0ee30881ca9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1581,7 +1581,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, + gfp_t gfp_mask); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index beb918328eef..c493a80cb453 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -721,8 +721,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. diff --git a/include/linux/mii.h b/include/linux/mii.h index 219b93cad1dd..12ea29e04293 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -32,7 +32,7 @@ struct mii_if_info { extern int mii_link_ok (struct mii_if_info *mii); extern int mii_nway_restart (struct mii_if_info *mii); -extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern void mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); extern void mii_ethtool_get_link_ksettings( struct mii_if_info *mii, struct ethtool_link_ksettings *cmd); extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0025913505ab..66eaf0aa7f69 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1038,7 +1038,7 @@ enum { struct mlx5_mkey_seg { /* This is a two bit field occupying bits 31-30. * bit 31 is always 0, - * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation + * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have translation */ u8 status; u8 pcie_control; @@ -1157,6 +1157,9 @@ enum mlx5_cap_mode { HCA_CAP_OPMOD_GET_CUR = 1, }; +/* Any new cap addition must update mlx5_hca_caps_alloc() to allocate + * capability memory. + */ enum mlx5_cap_type { MLX5_CAP_GENERAL = 0, MLX5_CAP_ETHERNET_OFFLOADS, @@ -1213,55 +1216,55 @@ enum mlx5_qcam_feature_groups { /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ - MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap) + MLX5_GET(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->cur, cap) #define MLX5_CAP_GEN_64(mdev, cap) \ - MLX5_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap) + MLX5_GET64(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->cur, cap) #define MLX5_CAP_GEN_MAX(mdev, cap) \ - MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap) + MLX5_GET(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->max, cap) #define MLX5_CAP_GEN_2(mdev, cap) \ - MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap) + MLX5_GET(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->cur, cap) #define MLX5_CAP_GEN_2_64(mdev, cap) \ - MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap) + MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->cur, cap) #define MLX5_CAP_GEN_2_MAX(mdev, cap) \ - MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_max[MLX5_CAP_GENERAL_2], cap) + MLX5_GET(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->max, cap) #define MLX5_CAP_ETH(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ - mdev->caps.hca_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) + mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->cur, cap) #define MLX5_CAP_ETH_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ - mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) + mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->max, cap) #define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ - mdev->caps.hca_cur[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS], cap) + mdev->caps.hca[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS]->cur, cap) #define MLX5_CAP_ROCE(mdev, cap) \ - MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap) + MLX5_GET(roce_cap, mdev->caps.hca[MLX5_CAP_ROCE]->cur, cap) #define MLX5_CAP_ROCE_MAX(mdev, cap) \ - MLX5_GET(roce_cap, mdev->caps.hca_max[MLX5_CAP_ROCE], cap) + MLX5_GET(roce_cap, mdev->caps.hca[MLX5_CAP_ROCE]->max, cap) #define MLX5_CAP_ATOMIC(mdev, cap) \ - MLX5_GET(atomic_caps, mdev->caps.hca_cur[MLX5_CAP_ATOMIC], cap) + MLX5_GET(atomic_caps, mdev->caps.hca[MLX5_CAP_ATOMIC]->cur, cap) #define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ - MLX5_GET(atomic_caps, mdev->caps.hca_max[MLX5_CAP_ATOMIC], cap) + MLX5_GET(atomic_caps, mdev->caps.hca[MLX5_CAP_ATOMIC]->max, cap) #define MLX5_CAP_FLOWTABLE(mdev, cap) \ - MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap) + MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap) #define MLX5_CAP64_FLOWTABLE(mdev, cap) \ - MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap) + MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap) #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ - MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap) + MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->max, cap) #define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) @@ -1301,11 +1304,11 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ - mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) #define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ - mdev->caps.hca_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->max, cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) @@ -1327,31 +1330,31 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ - mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap) + mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap) #define MLX5_CAP64_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET64(flow_table_eswitch_cap, \ - (mdev)->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + (mdev)->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) #define MLX5_CAP_ESW_MAX(mdev, cap) \ MLX5_GET(e_switch_cap, \ - mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap) + mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap) #define MLX5_CAP_ODP(mdev, cap)\ - MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) + MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) #define MLX5_CAP_ODP_MAX(mdev, cap)\ - MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap) + MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap) #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ - mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) + mdev->caps.hca[MLX5_CAP_VECTOR_CALC]->cur, cap) #define MLX5_CAP_QOS(mdev, cap)\ - MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap) + MLX5_GET(qos_cap, mdev->caps.hca[MLX5_CAP_QOS]->cur, cap) #define MLX5_CAP_DEBUG(mdev, cap)\ - MLX5_GET(debug_cap, mdev->caps.hca_cur[MLX5_CAP_DEBUG], cap) + MLX5_GET(debug_cap, mdev->caps.hca[MLX5_CAP_DEBUG]->cur, cap) #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) @@ -1387,27 +1390,27 @@ enum mlx5_qcam_feature_groups { MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP_DEV_MEM(mdev, cap)\ - MLX5_GET(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) + MLX5_GET(device_mem_cap, mdev->caps.hca[MLX5_CAP_DEV_MEM]->cur, cap) #define MLX5_CAP64_DEV_MEM(mdev, cap)\ - MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) + MLX5_GET64(device_mem_cap, mdev->caps.hca[MLX5_CAP_DEV_MEM]->cur, cap) #define MLX5_CAP_TLS(mdev, cap) \ - MLX5_GET(tls_cap, (mdev)->caps.hca_cur[MLX5_CAP_TLS], cap) + MLX5_GET(tls_cap, (mdev)->caps.hca[MLX5_CAP_TLS]->cur, cap) #define MLX5_CAP_DEV_EVENT(mdev, cap)\ - MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap) + MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca[MLX5_CAP_DEV_EVENT]->cur, cap) #define MLX5_CAP_DEV_VDPA_EMULATION(mdev, cap)\ MLX5_GET(virtio_emulation_cap, \ - (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) + (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap) #define MLX5_CAP64_DEV_VDPA_EMULATION(mdev, cap)\ MLX5_GET64(virtio_emulation_cap, \ - (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) + (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap) #define MLX5_CAP_IPSEC(mdev, cap)\ - MLX5_GET(ipsec_cap, (mdev)->caps.hca_cur[MLX5_CAP_IPSEC], cap) + MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap) enum { MLX5_CMD_STAT_OK = 0x0, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1efe37466969..e23417424373 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -581,7 +581,7 @@ struct mlx5_priv { /* end: qp staff */ /* start: alloc staff */ - /* protect buffer alocation according to numa node */ + /* protect buffer allocation according to numa node */ struct mutex alloc_mutex; int numa_node; @@ -623,8 +623,7 @@ struct mlx5_priv { }; enum mlx5_device_state { - MLX5_DEVICE_STATE_UNINITIALIZED, - MLX5_DEVICE_STATE_UP, + MLX5_DEVICE_STATE_UP = 1, MLX5_DEVICE_STATE_INTERNAL_ERROR, }; @@ -730,6 +729,11 @@ struct mlx5_profile { } mr_cache[MAX_MR_CACHE_ENTRIES]; }; +struct mlx5_hca_cap { + u32 cur[MLX5_UN_SZ_DW(hca_cap_union)]; + u32 max[MLX5_UN_SZ_DW(hca_cap_union)]; +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -741,8 +745,7 @@ struct mlx5_core_dev { char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; struct { - u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; - u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; + struct mlx5_hca_cap *hca[MLX5_CAP_NUM]; u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_MCAM_REGS_NUM][MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; @@ -1044,8 +1047,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); @@ -1111,7 +1113,7 @@ static inline u8 mlx5_mkey_variant(u32 mkey) } /* Async-atomic event notifier used by mlx5 core to forward FW - * evetns recived from event queue to mlx5 consumers. + * evetns received from event queue to mlx5 consumers. * Optimise event queue dipatching. */ int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); @@ -1138,6 +1140,8 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); +bool mlx5_lag_is_master(struct mlx5_core_dev *dev); +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); @@ -1145,6 +1149,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, size_t *offsets); +struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index bc7db2e059eb..4ab5c1fc1270 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -29,11 +29,20 @@ enum { REP_LOADED, }; +enum mlx5_switchdev_event { + MLX5_SWITCHDEV_EVENT_PAIR, + MLX5_SWITCHDEV_EVENT_UNPAIR, +}; + struct mlx5_eswitch_rep; struct mlx5_eswitch_rep_ops { int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep); void (*unload)(struct mlx5_eswitch_rep *rep); void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); + int (*event)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + enum mlx5_switchdev_event event, + void *data); }; struct mlx5_eswitch_rep_data { @@ -63,6 +72,7 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch *from_esw, struct mlx5_eswitch_rep *rep, u32 sqn); #ifdef CONFIG_MLX5_ESWITCH @@ -128,6 +138,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); +struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ @@ -171,6 +182,11 @@ static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) return 0; } +static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) +{ + return NULL; +} + #endif /* CONFIG_MLX5_ESWITCH */ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6bbae0c3bc0b..f3638d09ba77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -865,7 +865,8 @@ struct mlx5_ifc_qos_cap_bits { u8 nic_bw_share[0x1]; u8 nic_rate_limit[0x1]; u8 packet_pacing_uid[0x1]; - u8 reserved_at_c[0x14]; + u8 log_esw_max_sched_depth[0x4]; + u8 reserved_at_10[0x10]; u8 reserved_at_20[0xb]; u8 log_max_qos_nic_queue_group[0x5]; @@ -1652,7 +1653,13 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_geneve_tlv_option_data_len[0x5]; u8 reserved_at_570[0x10]; - u8 reserved_at_580[0x33]; + u8 reserved_at_580[0xb]; + u8 log_max_dci_stream_channels[0x5]; + u8 reserved_at_590[0x3]; + u8 log_max_dci_errored_streams[0x5]; + u8 reserved_at_598[0x8]; + + u8 reserved_at_5a0[0x13]; u8 log_max_dek[0x5]; u8 reserved_at_5b8[0x4]; u8 mini_cqe_resp_stride_index[0x1]; @@ -3021,10 +3028,12 @@ struct mlx5_ifc_qpc_bits { u8 reserved_at_3c0[0x8]; u8 next_send_psn[0x18]; - u8 reserved_at_3e0[0x8]; + u8 reserved_at_3e0[0x3]; + u8 log_num_dci_stream_channels[0x5]; u8 cqn_snd[0x18]; - u8 reserved_at_400[0x8]; + u8 reserved_at_400[0x3]; + u8 log_num_dci_errored_streams[0x5]; u8 deth_sqpn[0x18]; u8 reserved_at_420[0x20]; @@ -3912,7 +3921,7 @@ struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x2]; u8 dbr_umem_valid[0x1]; - u8 apu_thread_cq[0x1]; + u8 apu_cq[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; @@ -3938,8 +3947,7 @@ struct mlx5_ifc_cqc_bits { u8 cq_period[0xc]; u8 cq_max_count[0x10]; - u8 reserved_at_a0[0x18]; - u8 c_eqn[0x8]; + u8 c_eqn_or_apu_element[0x20]; u8 reserved_at_c0[0x3]; u8 log_page_size[0x5]; diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 98b56b75c625..1a9c9d94cb59 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -11,13 +11,15 @@ enum { }; enum { - MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? - MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, }; enum { - MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, - MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = + BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT), + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = + BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED), }; struct mlx5_ifc_virtio_q_bits { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 52bbd2b7cb46..7f8ee09c711f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -103,11 +103,19 @@ struct page { unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; - /** - * @dma_addr: might require a 64-bit value on - * 32-bit architectures. - */ - unsigned long dma_addr[2]; + unsigned long dma_addr; + union { + /** + * dma_addr_upper: might require a 64-bit + * value on 32-bit architectures. + */ + unsigned long dma_addr_upper; + /** + * For frag page support, not supported in + * 32-bit architectures with 64-bit DMA. + */ + atomic_long_t pp_frag_count; + }; }; struct { /* slab, slob and slub */ union { diff --git a/include/linux/msi.h b/include/linux/msi.h index 6aff469e511d..e8bdcb83172b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -233,7 +233,7 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag); -u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag); +void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag); void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 135c943699d0..2f03cd9e371a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1318,6 +1318,9 @@ struct netdev_net_notifier { * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, + * struct xdp_buff *xdp); + * Get the xmit slave of master device based on the xdp_buff. * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * This function is used to wake up the softirq, ksoftirqd or kthread * responsible for sending and/or receiving packets on a specific @@ -1545,6 +1548,8 @@ struct net_device_ops { int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); + struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev, + struct xdp_buff *xdp); int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); @@ -3951,7 +3956,7 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason); /* * It is not allowed to call kfree_skb() or consume_skb() from hardware * interrupt context or with hardware interrupts being disabled. - * (in_irq() || irqs_disabled()) + * (in_hardirq() || irqs_disabled()) * * We provide four helpers that can be used in following contexts : * @@ -4076,6 +4081,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 10279c4830ac..ada1296c87d5 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -196,6 +196,9 @@ struct ip_set_region { u32 elements; /* Number of elements vs timeout */ }; +/* Max range where every element is added/deleted in one step */ +#define IPSET_MAX_RANGE (1<<20) + /* The max revision number supported by any set type + 1 */ #define IPSET_REVISION_MAX 9 diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 28d7027cd460..5897f3dbaf7c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -238,9 +238,6 @@ struct xt_table { u_int8_t af; /* address/protocol family */ int priority; /* hook order */ - /* called when table is needed in the given netns */ - int (*table_init)(struct net *net); - /* A unique name... */ const char name[XT_TABLE_MAXNAMELEN]; }; @@ -452,6 +449,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); +int xt_register_template(const struct xt_table *t, int(*table_init)(struct net *net)); +void xt_unregister_template(const struct xt_table *t); + #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index a8178253ce53..10a01978bc0d 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -127,4 +127,6 @@ static inline bool ebt_invalid_target(int target) return (target < -NUM_STANDARD_TARGETS || target >= 0); } +int ebt_register_template(const struct ebt_table *t, int(*table_init)(struct net *net)); +void ebt_unregister_template(const struct ebt_table *t); #endif diff --git a/include/linux/once.h b/include/linux/once.h index 9225ee6d96c7..ae6f4eb41cbe 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -7,7 +7,7 @@ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags); + unsigned long *flags, struct module *mod); /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only @@ -46,7 +46,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, if (unlikely(___ret)) { \ func(__VA_ARGS__); \ __do_once_done(&___done, &___once_key, \ - &___flags); \ + &___flags, THIS_MODULE); \ } \ } \ ___ret; \ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5d2705f1d01c..fc5642431b92 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -48,6 +48,7 @@ struct pipe_buffer { * @files: number of struct file referring this pipe (protected by ->i_lock) * @r_counter: reader counter * @w_counter: writer counter + * @poll_usage: is this pipe used for epoll, which has crazy wakeups? * @fasync_readers: reader side fasync * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers @@ -70,6 +71,7 @@ struct pipe_inode_info { unsigned int files; unsigned int r_counter; unsigned int w_counter; + unsigned int poll_usage; struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 71fac9237725..2e5565067355 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -215,7 +215,7 @@ static inline long scaled_ppm_to_ppb(long ppm) return (long)ppb; } -#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) /** * ptp_clock_register() - register a PTP hardware clock driver @@ -307,6 +307,33 @@ int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay); */ void ptp_cancel_worker_sync(struct ptp_clock *ptp); +#else +static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, + struct device *parent) +{ return NULL; } +static inline int ptp_clock_unregister(struct ptp_clock *ptp) +{ return 0; } +static inline void ptp_clock_event(struct ptp_clock *ptp, + struct ptp_clock_event *event) +{ } +static inline int ptp_clock_index(struct ptp_clock *ptp) +{ return -1; } +static inline int ptp_find_pin(struct ptp_clock *ptp, + enum ptp_pin_function func, unsigned int chan) +{ return -1; } +static inline int ptp_schedule_worker(struct ptp_clock *ptp, + unsigned long delay) +{ return -EOPNOTSUPP; } +static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) +{ } +#endif + +#if IS_BUILTIN(CONFIG_PTP_1588_CLOCK) +/* + * These are called by the network core, and don't work if PTP is in + * a loadable module. + */ + /** * ptp_get_vclocks_index() - get all vclocks index on pclock, and * caller is responsible to free memory @@ -327,26 +354,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); */ void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, int vclock_index); - #else -static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, - struct device *parent) -{ return NULL; } -static inline int ptp_clock_unregister(struct ptp_clock *ptp) -{ return 0; } -static inline void ptp_clock_event(struct ptp_clock *ptp, - struct ptp_clock_event *event) -{ } -static inline int ptp_clock_index(struct ptp_clock *ptp) -{ return -1; } -static inline int ptp_find_pin(struct ptp_clock *ptp, - enum ptp_pin_function func, unsigned int chan) -{ return -1; } -static inline int ptp_schedule_worker(struct ptp_clock *ptp, - unsigned long delay) -{ return -EOPNOTSUPP; } -static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) -{ } static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, diff --git a/include/linux/security.h b/include/linux/security.h index 24eda04221e9..5b7288521300 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -120,10 +120,11 @@ enum lockdown_reason { LOCKDOWN_MMIOTRACE, LOCKDOWN_DEBUGFS, LOCKDOWN_XMON_WR, + LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, - LOCKDOWN_BPF_READ, + LOCKDOWN_BPF_READ_KERNEL, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 52d7fb92a69d..c58cc142d23f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -518,6 +518,25 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) if (sysrq_ch) handle_sysrq(sysrq_ch); } + +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + int sysrq_ch; + + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, flags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, flags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { @@ -531,6 +550,11 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { spin_unlock(&port->lock); } +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + spin_unlock_irqrestore(&port->lock, flags); +} #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 783cc2368bb1..6bdb0db3e825 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4712,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) } #ifdef CONFIG_PAGE_POOL -static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page, - struct page_pool *pp) +static inline void skb_mark_for_recycle(struct sk_buff *skb) { skb->pp_recycle = 1; - page_pool_store_mem_info(page, pp); } #endif diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 54269e47ac9a..3ebfea0781f1 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -27,6 +27,7 @@ #define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ #define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ #define TEE_SHM_KERNEL_MAPPED BIT(6) /* Memory mapped in kernel space */ +#define TEE_SHM_PRIV BIT(7) /* Memory private to TEE driver */ struct device; struct tee_device; @@ -332,6 +333,7 @@ void *tee_get_drvdata(struct tee_device *teedev); * @returns a pointer to 'struct tee_shm' */ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); /** * tee_shm_register() - Register shared memory buffer diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 3aee78dda16d..784659d4dc99 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -196,6 +196,7 @@ struct otg_fsm { struct mutex lock; u8 *host_req_flag; struct delayed_work hnp_polling_work; + bool hnp_work_inited; bool state_changed; }; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 3357ac98878d..8cfe49d201dd 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -277,6 +277,17 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, size_t size, const char *name); +/** + * vdpa_alloc_device - allocate and initilaize a vDPA device + * + * @dev_struct: the type of the parent structure + * @member: the name of struct vdpa_device within the @dev_struct + * @parent: the parent device + * @config: the bus operations that is supported by this device + * @name: name of the vdpa device + * + * Return allocated data structure or ERR_PTR upon error + */ #define vdpa_alloc_device(dev_struct, member, parent, config, name) \ container_of(__vdpa_alloc_device( \ parent, config, \ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b1894e0323fa..41edbc01ffa4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -110,6 +110,7 @@ struct virtio_device { bool config_enabled; bool config_change_pending; spinlock_t config_lock; + spinlock_t vqs_list_lock; /* Protects VQs list access */ struct device dev; struct virtio_device_id id; const struct virtio_config_ops *config; diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 84db7b8f912f..212892cf9822 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -14,6 +14,7 @@ #include <linux/virtio_byteorder.h> #include <linux/uio.h> #include <linux/slab.h> +#include <linux/spinlock.h> #if IS_REACHABLE(CONFIG_VHOST_IOTLB) #include <linux/dma-direction.h> #include <linux/vhost_iotlb.h> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a53e94459ecd..a7360c8c72f8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -221,6 +221,7 @@ struct oob_data { struct adv_info { struct list_head list; + bool enabled; bool pending; __u8 instance; __u32 flags; @@ -628,6 +629,7 @@ struct hci_conn { __u8 init_addr_type; bdaddr_t resp_addr; __u8 resp_addr_type; + __u8 adv_instance; __u16 handle; __u16 state; __u8 mode; @@ -1223,13 +1225,25 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) dev_set_drvdata(&hdev->dev, data); } +static inline void *hci_get_priv(struct hci_dev *hdev) +{ + return (char *)hdev + sizeof(*hdev); +} + struct hci_dev *hci_dev_get(int index); struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); -struct hci_dev *hci_alloc_dev(void); +struct hci_dev *hci_alloc_dev_priv(int sizeof_priv); + +static inline struct hci_dev *hci_alloc_dev(void) +{ + return hci_alloc_dev_priv(0); +} + void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_release_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); @@ -1411,6 +1425,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); !hci_dev_test_flag(dev, HCI_AUTO_OFF)) #define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \ hci_dev_test_flag(dev, HCI_SC_ENABLED)) +#define rpa_valid(dev) (bacmp(&dev->rpa, BDADDR_ANY) && \ + !hci_dev_test_flag(dev, HCI_RPA_EXPIRED)) +#define adv_rpa_valid(adv) (bacmp(&adv->random_addr, BDADDR_ANY) && \ + !adv->rpa_expired) #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M)) diff --git a/include/net/bonding.h b/include/net/bonding.h index 46df47004803..15e083e18f75 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -150,11 +150,6 @@ struct bond_params { u8 ad_actor_system[ETH_ALEN + 2]; }; -struct bond_parm_tbl { - char *modename; - int mode; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -259,6 +254,7 @@ struct bonding { /* protecting ipsec_list */ spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ + struct bpf_prog *xdp_prog; }; #define bond_slave_get_rcu(dev) \ @@ -754,13 +750,6 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) /* exported from bond_main.c */ extern unsigned int bond_net_id; -extern const struct bond_parm_tbl bond_lacp_tbl[]; -extern const struct bond_parm_tbl xmit_hashtype_tbl[]; -extern const struct bond_parm_tbl arp_validate_tbl[]; -extern const struct bond_parm_tbl arp_all_targets_tbl[]; -extern const struct bond_parm_tbl fail_over_mac_tbl[]; -extern const struct bond_parm_tbl pri_reselect_tbl[]; -extern struct bond_parm_tbl ad_select_tbl[]; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; diff --git a/include/net/devlink.h b/include/net/devlink.h index 08f4c6191e72..154cf0dbca37 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -32,7 +32,7 @@ struct devlink_dev_stats { struct devlink_ops; struct devlink { - struct list_head list; + u32 index; struct list_head port_list; struct list_head rate_list; struct list_head sb_list; @@ -56,6 +56,8 @@ struct devlink { */ u8 reload_failed:1, reload_enabled:1; + refcount_t refcount; + struct completion comp; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -519,6 +521,9 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -559,6 +564,15 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME "enable_remote_dev_reset" #define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME "enable_eth" +#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE DEVLINK_PARAM_TYPE_BOOL + +#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME "enable_rdma" +#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE DEVLINK_PARAM_TYPE_BOOL + +#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet" +#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -1396,8 +1410,8 @@ struct devlink_ops { * * Note: @extack can be NULL when port notifier queries the port function. */ - int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, + int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, + int *hw_addr_len, struct netlink_ext_ack *extack); /** * @port_function_hw_addr_set: Port function's hardware address set function. @@ -1406,7 +1420,7 @@ struct devlink_ops { * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port * function handling for a particular port. */ - int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, + int (*port_function_hw_addr_set)(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); /** @@ -1462,8 +1476,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_get)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_get)(struct devlink_port *port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); @@ -1478,8 +1491,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_set)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_set)(struct devlink_port *port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); @@ -1546,13 +1558,15 @@ struct net *devlink_net(const struct devlink *devlink); * Drivers that operate on real HW must use devlink_alloc() instead. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net); + size_t priv_size, struct net *net, + struct device *dev); static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) + size_t priv_size, + struct device *dev) { - return devlink_alloc_ns(ops, priv_size, &init_net); + return devlink_alloc_ns(ops, priv_size, &init_net, dev); } -int devlink_register(struct devlink *devlink, struct device *dev); +int devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_reload_enable(struct devlink *devlink); void devlink_reload_disable(struct devlink *devlink); @@ -1633,8 +1647,16 @@ int devlink_params_register(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +int devlink_param_register(struct devlink *devlink, + const struct devlink_param *param); +void devlink_param_unregister(struct devlink *devlink, + const struct devlink_param *param); void devlink_params_publish(struct devlink *devlink); void devlink_params_unpublish(struct devlink *devlink); +void devlink_param_publish(struct devlink *devlink, + const struct devlink_param *param); +void devlink_param_unpublish(struct devlink *devlink, + const struct devlink_param *param); int devlink_port_params_register(struct devlink_port *devlink_port, const struct devlink_param *params, size_t params_count); diff --git a/include/net/dsa.h b/include/net/dsa.h index 7cc9507282d3..0c2cba45fa79 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -254,6 +254,8 @@ struct dsa_port { struct device_node *dn; unsigned int ageing_time; bool vlan_filtering; + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ + bool learning; u8 stp_state; struct net_device *bridge_dev; int bridge_num; @@ -714,8 +716,6 @@ struct dsa_switch_ops { int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); - int (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); /* * VLAN support diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 69c9eabf8325..3961461d9c8b 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } /** - * flow_action_has_one_action() - check if exactly one action is present + * flow_offload_has_one_action() - check if exactly one action is present * @action: tc filter flow offload action * * Returns true if exactly one action is present. @@ -319,14 +319,12 @@ flow_action_mixed_hw_stats_check(const struct flow_action *action, if (flow_offload_has_one_action(action)) return true; - if (action) { - flow_action_for_each(i, action_entry, action) { - if (i && action_entry->hw_stats != last_hw_stats) { - NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); - return false; - } - last_hw_stats = action_entry->hw_stats; + flow_action_for_each(i, action_entry, action) { + if (i && action_entry->hw_stats != last_hw_stats) { + NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); + return false; } + last_hw_stats = action_entry->hw_stats; } return true; } @@ -453,6 +451,7 @@ struct flow_block_offload { struct list_head *driver_block_list; struct netlink_ext_ack *extack; struct Qdisc *sch; + struct list_head *cb_list_head; }; enum tc_setup_type; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 820eae3ea95f..5efd0b71dc67 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { - int mtu; + unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; diff --git a/include/net/ipx.h b/include/net/ipx.h deleted file mode 100644 index 9d1342807b59..000000000000 --- a/include/net/ipx.h +++ /dev/null @@ -1,171 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_INET_IPX_H_ -#define _NET_INET_IPX_H_ -/* - * The following information is in its entirety obtained from: - * - * Novell 'IPX Router Specification' Version 1.10 - * Part No. 107-000029-001 - * - * Which is available from ftp.novell.com - */ - -#include <linux/netdevice.h> -#include <net/datalink.h> -#include <linux/ipx.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/refcount.h> - -struct ipx_address { - __be32 net; - __u8 node[IPX_NODE_LEN]; - __be16 sock; -}; - -#define ipx_broadcast_node "\377\377\377\377\377\377" -#define ipx_this_node "\0\0\0\0\0\0" - -#define IPX_MAX_PPROP_HOPS 8 - -struct ipxhdr { - __be16 ipx_checksum __packed; -#define IPX_NO_CHECKSUM cpu_to_be16(0xFFFF) - __be16 ipx_pktsize __packed; - __u8 ipx_tctrl; - __u8 ipx_type; -#define IPX_TYPE_UNKNOWN 0x00 -#define IPX_TYPE_RIP 0x01 /* may also be 0 */ -#define IPX_TYPE_SAP 0x04 /* may also be 0 */ -#define IPX_TYPE_SPX 0x05 /* SPX protocol */ -#define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */ -#define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast */ - struct ipx_address ipx_dest __packed; - struct ipx_address ipx_source __packed; -}; - -/* From af_ipx.c */ -extern int sysctl_ipx_pprop_broadcasting; - -struct ipx_interface { - /* IPX address */ - __be32 if_netnum; - unsigned char if_node[IPX_NODE_LEN]; - refcount_t refcnt; - - /* physical device info */ - struct net_device *if_dev; - struct datalink_proto *if_dlink; - __be16 if_dlink_type; - - /* socket support */ - unsigned short if_sknum; - struct hlist_head if_sklist; - spinlock_t if_sklist_lock; - - /* administrative overhead */ - int if_ipx_offset; - unsigned char if_internal; - unsigned char if_primary; - - struct list_head node; /* node in ipx_interfaces list */ -}; - -struct ipx_route { - __be32 ir_net; - struct ipx_interface *ir_intrfc; - unsigned char ir_routed; - unsigned char ir_router_node[IPX_NODE_LEN]; - struct list_head node; /* node in ipx_routes list */ - refcount_t refcnt; -}; - -struct ipx_cb { - u8 ipx_tctrl; - __be32 ipx_dest_net; - __be32 ipx_source_net; - struct { - __be32 netnum; - int index; - } last_hop; -}; - -#include <net/sock.h> - -struct ipx_sock { - /* struct sock has to be the first member of ipx_sock */ - struct sock sk; - struct ipx_address dest_addr; - struct ipx_interface *intrfc; - __be16 port; -#ifdef CONFIG_IPX_INTERN - unsigned char node[IPX_NODE_LEN]; -#endif - unsigned short type; - /* - * To handle special ncp connection-handling sockets for mars_nwe, - * the connection number must be stored in the socket. - */ - unsigned short ipx_ncp_conn; -}; - -static inline struct ipx_sock *ipx_sk(struct sock *sk) -{ - return (struct ipx_sock *)sk; -} - -#define IPX_SKB_CB(__skb) ((struct ipx_cb *)&((__skb)->cb[0])) - -#define IPX_MIN_EPHEMERAL_SOCKET 0x4000 -#define IPX_MAX_EPHEMERAL_SOCKET 0x7fff - -extern struct list_head ipx_routes; -extern rwlock_t ipx_routes_lock; - -extern struct list_head ipx_interfaces; -struct ipx_interface *ipx_interfaces_head(void); -extern spinlock_t ipx_interfaces_lock; - -extern struct ipx_interface *ipx_primary_net; - -int ipx_proc_init(void); -void ipx_proc_exit(void); - -const char *ipx_frame_name(__be16); -const char *ipx_device_name(struct ipx_interface *intrfc); - -static __inline__ void ipxitf_hold(struct ipx_interface *intrfc) -{ - refcount_inc(&intrfc->refcnt); -} - -void ipxitf_down(struct ipx_interface *intrfc); -struct ipx_interface *ipxitf_find_using_net(__be32 net); -int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node); -__be16 ipx_cksum(struct ipxhdr *packet, int length); -int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc, - unsigned char *node); -void ipxrtr_del_routes(struct ipx_interface *intrfc); -int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, - struct msghdr *msg, size_t len, int noblock); -int ipxrtr_route_skb(struct sk_buff *skb); -struct ipx_route *ipxrtr_lookup(__be32 net); -int ipxrtr_ioctl(unsigned int cmd, void __user *arg); - -static __inline__ void ipxitf_put(struct ipx_interface *intrfc) -{ - if (refcount_dec_and_test(&intrfc->refcnt)) - ipxitf_down(intrfc); -} - -static __inline__ void ipxrtr_hold(struct ipx_route *rt) -{ - refcount_inc(&rt->refcnt); -} - -static __inline__ void ipxrtr_put(struct ipx_route *rt) -{ - if (refcount_dec_and_test(&rt->refcnt)) - kfree(rt); -} -#endif /* _NET_INET_IPX_H_ */ diff --git a/include/net/mctp.h b/include/net/mctp.h index 54bbe042c973..a824d47c3c6d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -173,6 +173,7 @@ struct mctp_route { struct mctp_dev *dev; unsigned int mtu; + unsigned char type; int (*output)(struct mctp_route *route, struct sk_buff *skb); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cc54750dd3db..bb5fa5914032 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -23,7 +23,6 @@ #include <net/netns/ieee802154_6lowpan.h> #include <net/netns/sctp.h> #include <net/netns/netfilter.h> -#include <net/netns/x_tables.h> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netns/conntrack.h> #endif @@ -133,7 +132,6 @@ struct net { #endif #ifdef CONFIG_NETFILTER struct netns_nf nf; - struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index e770bba00066..9eed51e920e8 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -33,8 +33,8 @@ struct nf_queue_handler { void (*nf_hook_drop)(struct net *net); }; -void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); -void nf_unregister_queue_handler(struct net *net); +void nf_register_queue_handler(const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(void); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); void nf_queue_entry_get_refs(struct nf_queue_entry *entry); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 37e5300c7e5a..fefd38db95b3 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -30,7 +30,6 @@ struct nf_tcp_net { u8 tcp_ignore_invalid_rst; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; - unsigned int offload_pickup; #endif }; @@ -44,7 +43,6 @@ struct nf_udp_net { unsigned int timeouts[UDP_CT_MAX]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; - unsigned int offload_pickup; #endif }; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 15e2b13fb0c0..986a2a9cfdfa 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -12,7 +12,6 @@ struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; #endif - const struct nf_queue_handler __rcu *queue_handler; const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h deleted file mode 100644 index d02316ec2906..000000000000 --- a/include/net/netns/x_tables.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NETNS_X_TABLES_H -#define __NETNS_X_TABLES_H - -#include <linux/list.h> -#include <linux/netfilter_defs.h> - -struct netns_xt { - bool notrack_deprecated_warning; - bool clusterip_deprecated_warning; -}; -#endif diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index e946366e8ba5..1f4e1816fd36 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -75,6 +75,7 @@ struct netns_xfrm { #endif spinlock_t xfrm_state_lock; seqcount_spinlock_t xfrm_state_hash_generation; + seqcount_spinlock_t xfrm_policy_hash_generation; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 3dd62dd73027..a4082406a003 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -45,7 +45,10 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ + PP_FLAG_DMA_SYNC_DEV |\ + PP_FLAG_PAGE_FRAG) /* * Fast allocation side cache array/stack @@ -88,6 +91,9 @@ struct page_pool { unsigned long defer_warn; u32 pages_state_hold_cnt; + unsigned int frag_offset; + struct page *frag_page; + long frag_users; /* * Data structure for allocation side @@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp); + +static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_frag(pool, offset, size, gfp); +} + /* get the stored dma direction. A driver might decide to treat this locally and * avoid the extra cache line from page_pool to determine the direction */ @@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - dma_addr_t ret = page->dma_addr[0]; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + return ret; } static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { - page->dma_addr[0] = addr; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - page->dma_addr[1] = upper_32_bits(addr); + page->dma_addr = addr; + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + page->dma_addr_upper = upper_32_bits(addr); +} + +static inline void page_pool_set_frag_count(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_atomic_sub_frag_count_return(struct page *page, + long nr) +{ + long ret; + + /* As suggested by Alexander, atomic_long_read() may cover up the + * reference count errors, so avoid calling atomic_long_read() in + * the cases of freeing or draining the page_frags, where we would + * not expect it to match or that are slowpath anyway. + */ + if (__builtin_constant_p(nr) && + atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; } static inline bool is_page_pool_compiled_in(void) @@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool) spin_unlock_bh(&pool->ring.producer_lock); } -/* Store mem_info on struct page and use it while recycling skb frags */ -static inline -void page_pool_store_mem_info(struct page *page, struct page_pool *pp) -{ - page->pp = pp; -} - #endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 64de26b7ad39..8fb47fc61097 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -329,6 +329,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); /** * struct tcf_pkt_info - packet information + * + * @ptr: start of the pkt data + * @nexthdr: offset of the next header */ struct tcf_pkt_info { unsigned char * ptr; @@ -347,6 +350,7 @@ struct tcf_ematch_ops; * @ops: the operations lookup table of the corresponding ematch module * @datalen: length of the ematch specific configuration data * @data: ematch specific data + * @net: the network namespace */ struct tcf_ematch { struct tcf_ematch_ops * ops; diff --git a/include/net/psample.h b/include/net/psample.h index e328c5127757..0509d2d6be67 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -31,6 +31,8 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num); void psample_group_take(struct psample_group *group); void psample_group_put(struct psample_group *group); +struct sk_buff; + #if IS_ENABLED(CONFIG_PSAMPLE) void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, diff --git a/include/net/sock.h b/include/net/sock.h index 6e761451c927..95b25777b53e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2400,6 +2400,11 @@ static inline gfp_t gfp_any(void) return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } +static inline gfp_t gfp_memcg_charge(void) +{ + return in_softirq() ? GFP_NOWAIT : GFP_KERNEL; +} + static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) { return noblock ? 0 : sk->sk_rcvtimeo; diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 2f5ce4d4fdbf..fb5681f7e61b 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -589,6 +589,9 @@ enum ocelot_sb_pool { OCELOT_SB_POOL_NUM, }; +#define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0) +#define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1) + struct ocelot_port { struct ocelot *ocelot; @@ -798,18 +801,12 @@ void ocelot_init_port(struct ocelot *ocelot, int port); void ocelot_deinit_port(struct ocelot *ocelot, int port); /* DSA callbacks */ -void ocelot_port_enable(struct ocelot *ocelot, int port, - struct phy_device *phy); -void ocelot_port_disable(struct ocelot *ocelot, int port); void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data); void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data); int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset); int ocelot_get_ts_info(struct ocelot *ocelot, int port, struct ethtool_ts_info *info); void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); -int ocelot_port_flush(struct ocelot *ocelot, int port); -void ocelot_adjust_link(struct ocelot *ocelot, int port, - struct phy_device *phydev); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled); void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state); void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot); @@ -894,6 +891,18 @@ int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); +void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port, + unsigned int link_an_mode, + phy_interface_t interface, + unsigned long quirks); +void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, + struct phy_device *phydev, + unsigned int link_an_mode, + phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause, + unsigned long quirks); + #if IS_ENABLED(CONFIG_BRIDGE_MRP) int ocelot_mrp_add(struct ocelot *ocelot, int port, const struct switchdev_obj_mrp *mrp); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 67aa7134b301..b6db6590baf0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -639,6 +639,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch { enum ethtool_link_ext_substate_bad_signal_integrity { ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS, }; /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5aca85874447..f71a81fdbbc6 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -549,6 +549,21 @@ enum { BRIDGE_VLANDB_GOPTS_ID, BRIDGE_VLANDB_GOPTS_RANGE, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, + BRIDGE_VLANDB_GOPTS_PAD, + BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) @@ -756,4 +771,17 @@ struct br_boolopt_multi { __u32 optval; __u32 optmask; }; + +enum { + BRIDGE_QUERIER_UNSPEC, + BRIDGE_QUERIER_IP_ADDRESS, + BRIDGE_QUERIER_IP_PORT, + BRIDGE_QUERIER_IP_OTHER_TIMER, + BRIDGE_QUERIER_PAD, + BRIDGE_QUERIER_IPV6_ADDRESS, + BRIDGE_QUERIER_IPV6_PORT, + BRIDGE_QUERIER_IPV6_OTHER_TIMER, + __BRIDGE_QUERIER_MAX +}; +#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1) #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5310003523ce..8aad65b69054 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -479,6 +479,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h deleted file mode 100644 index 3168137adae8..000000000000 --- a/include/uapi/linux/ipx.h +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IPX_H_ -#define _IPX_H_ -#include <linux/libc-compat.h> /* for compatibility with glibc netipx/ipx.h */ -#include <linux/types.h> -#include <linux/sockios.h> -#include <linux/socket.h> -#define IPX_NODE_LEN 6 -#define IPX_MTU 576 - -#if __UAPI_DEF_SOCKADDR_IPX -struct sockaddr_ipx { - __kernel_sa_family_t sipx_family; - __be16 sipx_port; - __be32 sipx_network; - unsigned char sipx_node[IPX_NODE_LEN]; - __u8 sipx_type; - unsigned char sipx_zero; /* 16 byte fill */ -}; -#endif /* __UAPI_DEF_SOCKADDR_IPX */ - -/* - * So we can fit the extra info for SIOCSIFADDR into the address nicely - */ -#define sipx_special sipx_port -#define sipx_action sipx_zero -#define IPX_DLTITF 0 -#define IPX_CRTITF 1 - -#if __UAPI_DEF_IPX_ROUTE_DEFINITION -struct ipx_route_definition { - __be32 ipx_network; - __be32 ipx_router_network; - unsigned char ipx_router_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */ - -#if __UAPI_DEF_IPX_INTERFACE_DEFINITION -struct ipx_interface_definition { - __be32 ipx_network; - unsigned char ipx_device[16]; - unsigned char ipx_dlink_type; -#define IPX_FRAME_NONE 0 -#define IPX_FRAME_SNAP 1 -#define IPX_FRAME_8022 2 -#define IPX_FRAME_ETHERII 3 -#define IPX_FRAME_8023 4 -#define IPX_FRAME_TR_8022 5 /* obsolete */ - unsigned char ipx_special; -#define IPX_SPECIAL_NONE 0 -#define IPX_PRIMARY 1 -#define IPX_INTERNAL 2 - unsigned char ipx_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */ - -#if __UAPI_DEF_IPX_CONFIG_DATA -struct ipx_config_data { - unsigned char ipxcfg_auto_select_primary; - unsigned char ipxcfg_auto_create_interfaces; -}; -#endif /* __UAPI_DEF_IPX_CONFIG_DATA */ - -/* - * OLD Route Definition for backward compatibility. - */ - -#if __UAPI_DEF_IPX_ROUTE_DEF -struct ipx_route_def { - __be32 ipx_network; - __be32 ipx_router_network; -#define IPX_ROUTE_NO_ROUTER 0 - unsigned char ipx_router_node[IPX_NODE_LEN]; - unsigned char ipx_device[16]; - unsigned short ipx_flags; -#define IPX_RT_SNAP 8 -#define IPX_RT_8022 4 -#define IPX_RT_BLUEBOOK 2 -#define IPX_RT_ROUTED 1 -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEF */ - -#define SIOCAIPXITFCRT (SIOCPROTOPRIVATE) -#define SIOCAIPXPRISLT (SIOCPROTOPRIVATE + 1) -#define SIOCIPXCFGDATA (SIOCPROTOPRIVATE + 2) -#define SIOCIPXNCPCONN (SIOCPROTOPRIVATE + 3) -#endif /* _IPX_H_ */ diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 7b05f7102321..f66038b9551f 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -73,6 +73,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) +#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) enum { MPTCP_PM_CMD_UNSPEC, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index dc8b72201f6c..00a60695fa53 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -66,8 +66,11 @@ enum { #define NUD_NONE 0x00 /* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT also cannot be deleted by garbage collectors. + * and make no address resolution or NUD. + * NUD_PERMANENT also cannot be deleted by garbage collectors. + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. */ struct nda_cacheinfo { diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index d8484be72fdc..c6e6d7d7d538 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -56,6 +56,7 @@ enum ctattr_type { CTA_LABELS_MASK, CTA_SYNPROXY, CTA_FILTER, + CTA_STATUS_MASK, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h index 912ec60b26b0..bbcd285b22e1 100644 --- a/include/uapi/linux/netfilter/nfnetlink_hook.h +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -43,6 +43,15 @@ enum nfnl_hook_chain_info_attributes { }; #define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1) +enum nfnl_hook_chain_desc_attributes { + NFNLA_CHAIN_UNSPEC, + NFNLA_CHAIN_TABLE, + NFNLA_CHAIN_FAMILY, + NFNLA_CHAIN_NAME, + __NFNLA_CHAIN_MAX, +}; +#define NFNLA_CHAIN_MAX (__NFNLA_CHAIN_MAX - 1) + /** * enum nfnl_hook_chaintype - chain type * diff --git a/init/main.c b/init/main.c index 11cbbec309fa..daad6979f782 100644 --- a/init/main.c +++ b/init/main.c @@ -397,6 +397,12 @@ static int __init bootconfig_params(char *param, char *val, return 0; } +static int __init warn_bootconfig(char *str) +{ + /* The 'bootconfig' has been handled by bootconfig_params(). */ + return 0; +} + static void __init setup_boot_config(void) { static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; @@ -475,9 +481,8 @@ static int __init warn_bootconfig(char *str) pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n"); return 0; } -early_param("bootconfig", warn_bootconfig); - #endif +early_param("bootconfig", warn_bootconfig); /* Change NUL term back to "=", to make "param" the whole string. */ static void __init repair_env_string(char *param, char *val) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b1a5fc04492b..82af6279992d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1362,11 +1362,13 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) } /** - * __bpf_prog_run - run eBPF program on a given context + * ___bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions * * Decode and execute eBPF instructions. + * + * Return: whatever value is in %BPF_R0 at program exit */ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { @@ -1562,7 +1564,7 @@ select_insn: if (unlikely(index >= array->map.max_entries)) goto out; - if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) goto out; tail_call_cnt++; @@ -1878,6 +1880,9 @@ static void bpf_prog_select_func(struct bpf_prog *fp) * * Try to JIT eBPF program, if JIT is not available, use interpreter. * The BPF program will be executed via BPF_PROG_RUN() macro. + * + * Return: the &fp argument along with &err set to 0 for success or + * a negative errno code on failure */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 542e94fa30b4..f02d04540c0c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); } -static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp, - int exclude_ifindex) +static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp) { - if (!obj || obj->dev->ifindex == exclude_ifindex || + if (!obj || !obj->dev->netdev_ops->ndo_xdp_xmit) return false; @@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj, return 0; } +static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex) +{ + while (num_excluded--) { + if (ifindex == excluded[num_excluded]) + return true; + } + return false; +} + +/* Get ifindex of each upper device. 'indexes' must be able to hold at + * least MAX_NEST_DEV elements. + * Returns the number of ifindexes added. + */ +static int get_upper_ifindexes(struct net_device *dev, int *indexes) +{ + struct net_device *upper; + struct list_head *iter; + int n = 0; + + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + indexes[n++] = upper->ifindex; + } + return n; +} + int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct xdp_frame *xdpf; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev_rx, excluded_devices); + excluded_devices[num_excluded++] = dev_rx->ifindex; + } + xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) return -EOVERFLOW; @@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, head = dev_map_index_hash(dtab, i); hlist_for_each_entry_rcu(dst, head, index_hlist, lockdep_is_held(&dtab->index_lock)) { - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct hlist_node *next; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev, excluded_devices); + excluded_devices[num_excluded++] = dev->ifindex; + } + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return err; last_dst = dst; + } } else { /* BPF_MAP_TYPE_DEVMAP_HASH */ for (i = 0; i < dtab->n_buckets; i++) { head = dev_map_index_hash(dtab, i); hlist_for_each_entry_safe(dst, next, head, index_hlist) { - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 6dc3fae46a56..32471ba02708 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1644,8 +1644,8 @@ alloc: /* We cannot do copy_from_user or copy_to_user inside * the rcu_read_lock. Allocate enough space here. */ - keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN); - values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN); + keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); + values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); if (!keys || !values) { ret = -ENOMEM; goto after_loop; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 15746f779fe1..04010a5e2ddb 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -363,9 +363,15 @@ const struct bpf_func_proto bpf_jiffies64_proto = { #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { - struct cgroup *cgrp = task_dfl_cgroup(current); + struct cgroup *cgrp; + u64 cgrp_id; - return cgroup_id(cgrp); + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + cgrp_id = cgroup_id(cgrp); + rcu_read_unlock(); + + return cgrp_id; } const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { @@ -376,13 +382,17 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) { - struct cgroup *cgrp = task_dfl_cgroup(current); + struct cgroup *cgrp; struct cgroup *ancestor; + u64 cgrp_id; + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); ancestor = cgroup_ancestor(cgrp, ancestor_level); - if (!ancestor) - return 0; - return cgroup_id(ancestor); + cgrp_id = ancestor ? cgroup_id(ancestor) : 0; + rcu_read_unlock(); + + return cgrp_id; } const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { @@ -1396,12 +1406,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5ea2238a6656..e5f2b23bb7c9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11955,6 +11955,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) if (aux_data[i].seen) continue; memcpy(insn + i, &trap, sizeof(trap)); + aux_data[i].zext_dst = false; } } diff --git a/kernel/cfi.c b/kernel/cfi.c index e17a56639766..9594cfd1cf2c 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -248,9 +248,9 @@ static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr) { cfi_check_fn fn; - rcu_read_lock_sched(); + rcu_read_lock_sched_notrace(); fn = ptr_to_check_fn(rcu_dereference_sched(cfi_shadow), ptr); - rcu_read_unlock_sched(); + rcu_read_unlock_sched_notrace(); return fn; } @@ -269,11 +269,11 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr) cfi_check_fn fn = NULL; struct module *mod; - rcu_read_lock_sched(); + rcu_read_lock_sched_notrace(); mod = __module_address(ptr); if (mod) fn = mod->cfi_check; - rcu_read_unlock_sched(); + rcu_read_unlock_sched_notrace(); return fn; } diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 7f0e58917432..b264ab5652ba 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -347,19 +347,20 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) } static struct cgroup_rstat_cpu * -cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp) +cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) { struct cgroup_rstat_cpu *rstatc; rstatc = get_cpu_ptr(cgrp->rstat_cpu); - u64_stats_update_begin(&rstatc->bsync); + *flags = u64_stats_update_begin_irqsave(&rstatc->bsync); return rstatc; } static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, - struct cgroup_rstat_cpu *rstatc) + struct cgroup_rstat_cpu *rstatc, + unsigned long flags) { - u64_stats_update_end(&rstatc->bsync); + u64_stats_update_end_irqrestore(&rstatc->bsync, flags); cgroup_rstat_updated(cgrp, smp_processor_id()); put_cpu_ptr(rstatc); } @@ -367,18 +368,20 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); rstatc->bstat.cputime.sum_exec_runtime += delta_exec; - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); switch (index) { case CPUTIME_USER: @@ -394,7 +397,7 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, break; } - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 464917096e73..1cb1f9b8392e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11917,6 +11917,37 @@ again: return gctx; } +static bool +perf_check_permission(struct perf_event_attr *attr, struct task_struct *task) +{ + unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS; + bool is_capable = perfmon_capable(); + + if (attr->sigtrap) { + /* + * perf_event_attr::sigtrap sends signals to the other task. + * Require the current task to also have CAP_KILL. + */ + rcu_read_lock(); + is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); + rcu_read_unlock(); + + /* + * If the required capabilities aren't available, checks for + * ptrace permissions: upgrade to ATTACH, since sending signals + * can effectively change the target task. + */ + ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS; + } + + /* + * Preserve ptrace permission check for backwards compatibility. The + * ptrace check also includes checks that the current task and other + * task have matching uids, and is therefore not done here explicitly. + */ + return is_capable || ptrace_may_access(task, ptrace_mode); +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -12163,15 +12194,13 @@ SYSCALL_DEFINE5(perf_event_open, goto err_file; /* - * Preserve ptrace permission check for backwards compatibility. - * * We must hold exec_update_lock across this and any potential * perf_install_in_context() call for this new event to * serialize against exec() altering our credentials (and the * perf_event_exit_task() that could imply). */ err = -EACCES; - if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) + if (!perf_check_permission(&attr, task)) goto err_cred; } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 7f04c7d8296e..a98bcfc4be7b 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -265,8 +265,11 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) } else { switch (__irq_startup_managed(desc, aff, force)) { case IRQ_STARTUP_NORMAL: + if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP) + irq_setup_affinity(desc); ret = __irq_startup(desc); - irq_setup_affinity(desc); + if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)) + irq_setup_affinity(desc); break; case IRQ_STARTUP_MANAGED: irq_do_set_affinity(d, aff, false); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c41965e348b5..85df3ca03efe 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -476,11 +476,6 @@ skip_activate: return 0; cleanup: - for_each_msi_vector(desc, i, dev) { - irq_data = irq_domain_get_irq_data(domain, i); - if (irqd_is_activated(irq_data)) - irq_domain_deactivate_irq(irq_data); - } msi_domain_free_irqs(domain, dev); return ret; } @@ -505,7 +500,15 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) { + struct irq_data *irq_data; struct msi_desc *desc; + int i; + + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); + if (irqd_is_activated(irq_data)) + irq_domain_deactivate_irq(irq_data); + } for_each_msi_entry(desc, dev) { /* diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index d309d6fbf5bd..4d2a702d7aa9 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -453,6 +453,11 @@ static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, */ index = irq_timings_interval_index(interval); + if (index > PREDICTION_BUFFER_SIZE - 1) { + irqs->count = 0; + return; + } + /* * Store the index as an element of the pattern in another * circular array. diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b5d9bb5202c6..ad0db322ed3b 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -343,7 +343,7 @@ static __always_inline bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, enum rtmutex_chainwalk chwalk) { - if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEX)) + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) return waiter != NULL; return chwalk == RT_MUTEX_FULL_CHAINWALK; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d9ff40f4661..20ffcc044134 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1981,12 +1981,18 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dequeue_task(rq, p, flags); } -/* - * __normal_prio - return the priority that is based on the static prio - */ -static inline int __normal_prio(struct task_struct *p) +static inline int __normal_prio(int policy, int rt_prio, int nice) { - return p->static_prio; + int prio; + + if (dl_policy(policy)) + prio = MAX_DL_PRIO - 1; + else if (rt_policy(policy)) + prio = MAX_RT_PRIO - 1 - rt_prio; + else + prio = NICE_TO_PRIO(nice); + + return prio; } /* @@ -1998,15 +2004,7 @@ static inline int __normal_prio(struct task_struct *p) */ static inline int normal_prio(struct task_struct *p) { - int prio; - - if (task_has_dl_policy(p)) - prio = MAX_DL_PRIO-1; - else if (task_has_rt_policy(p)) - prio = MAX_RT_PRIO-1 - p->rt_priority; - else - prio = __normal_prio(p); - return prio; + return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); } /* @@ -4099,7 +4097,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) } else if (PRIO_TO_NICE(p->static_prio) < 0) p->static_prio = NICE_TO_PRIO(0); - p->prio = p->normal_prio = __normal_prio(p); + p->prio = p->normal_prio = p->static_prio; set_load_weight(p, false); /* @@ -6341,6 +6339,18 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag } EXPORT_SYMBOL(default_wake_function); +static void __setscheduler_prio(struct task_struct *p, int prio) +{ + if (dl_prio(prio)) + p->sched_class = &dl_sched_class; + else if (rt_prio(prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; + + p->prio = prio; +} + #ifdef CONFIG_RT_MUTEXES static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) @@ -6456,22 +6466,19 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) } else { p->dl.pi_se = &p->dl; } - p->sched_class = &dl_sched_class; } else if (rt_prio(prio)) { if (dl_prio(oldprio)) p->dl.pi_se = &p->dl; if (oldprio < prio) queue_flag |= ENQUEUE_HEAD; - p->sched_class = &rt_sched_class; } else { if (dl_prio(oldprio)) p->dl.pi_se = &p->dl; if (rt_prio(oldprio)) p->rt.timeout = 0; - p->sched_class = &fair_sched_class; } - p->prio = prio; + __setscheduler_prio(p, prio); if (queued) enqueue_task(rq, p, queue_flag); @@ -6824,35 +6831,6 @@ static void __setscheduler_params(struct task_struct *p, set_load_weight(p, true); } -/* Actually do priority change: must hold pi & rq lock. */ -static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr, bool keep_boost) -{ - /* - * If params can't change scheduling class changes aren't allowed - * either. - */ - if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) - return; - - __setscheduler_params(p, attr); - - /* - * Keep a potential priority boosting if called from - * sched_setscheduler(). - */ - p->prio = normal_prio(p); - if (keep_boost) - p->prio = rt_effective_prio(p, p->prio); - - if (dl_prio(p->prio)) - p->sched_class = &dl_sched_class; - else if (rt_prio(p->prio)) - p->sched_class = &rt_sched_class; - else - p->sched_class = &fair_sched_class; -} - /* * Check the target process has a UID that matches the current process's: */ @@ -6873,10 +6851,8 @@ static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi) { - int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : - MAX_RT_PRIO - 1 - attr->sched_priority; - int retval, oldprio, oldpolicy = -1, queued, running; - int new_effective_prio, policy = attr->sched_policy; + int oldpolicy = -1, policy = attr->sched_policy; + int retval, oldprio, newprio, queued, running; const struct sched_class *prev_class; struct callback_head *head; struct rq_flags rf; @@ -7074,6 +7050,7 @@ change: p->sched_reset_on_fork = reset_on_fork; oldprio = p->prio; + newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); if (pi) { /* * Take priority boosted tasks into account. If the new @@ -7082,8 +7059,8 @@ change: * the runqueue. This will be done when the task deboost * itself. */ - new_effective_prio = rt_effective_prio(p, newprio); - if (new_effective_prio == oldprio) + newprio = rt_effective_prio(p, newprio); + if (newprio == oldprio) queue_flags &= ~DEQUEUE_MOVE; } @@ -7096,7 +7073,10 @@ change: prev_class = p->sched_class; - __setscheduler(rq, p, attr, pi); + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { + __setscheduler_params(p, attr); + __setscheduler_prio(p, newprio); + } __setscheduler_uclamp(p, attr); if (queued) { diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 057e17f3215d..6469eca8078c 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -602,7 +602,7 @@ static inline void seccomp_sync_threads(unsigned long flags) smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); atomic_set(&thread->seccomp.filter_count, - atomic_read(&thread->seccomp.filter_count)); + atomic_read(&caller->seccomp.filter_count)); /* * Don't let an unprivileged task work around diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9eb11c2209e5..e3d2c23c413d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1265,8 +1265,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) static void timer_sync_wait_running(struct timer_base *base) { if (atomic_read(&base->timer_waiters)) { + raw_spin_unlock_irq(&base->lock); spin_unlock(&base->expiry_lock); spin_lock(&base->expiry_lock); + raw_spin_lock_irq(&base->lock); } } @@ -1457,14 +1459,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) if (timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); call_timer_fn(timer, fn, baseclk); - base->running_timer = NULL; raw_spin_lock(&base->lock); + base->running_timer = NULL; } else { raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, baseclk); + raw_spin_lock_irq(&base->lock); base->running_timer = NULL; timer_sync_wait_running(base); - raw_spin_lock_irq(&base->lock); } } } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d567b1717c4c..3ee23f4d437f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -219,6 +219,11 @@ config DYNAMIC_FTRACE_WITH_DIRECT_CALLS depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +config DYNAMIC_FTRACE_WITH_ARGS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS + config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index c5e0b6a64091..0da94e1d6af9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1017,28 +1017,29 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; - case BPF_FUNC_probe_write_user: - return bpf_get_probe_write_proto(); case BPF_FUNC_current_task_under_cgroup: return &bpf_current_task_under_cgroup_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; + case BPF_FUNC_probe_write_user: + return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? + NULL : bpf_get_probe_write_proto(); case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE case BPF_FUNC_probe_read: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_str_proto; #endif #ifdef CONFIG_CGROUPS diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c59dd35a6da5..a1adb29ef5c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2897,14 +2897,26 @@ int tracepoint_printk_sysctl(struct ctl_table *table, int write, void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) { + enum event_trigger_type tt = ETT_NONE; + struct trace_event_file *file = fbuffer->trace_file; + + if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, + fbuffer->entry, &tt)) + goto discard; + if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); if (static_branch_unlikely(&trace_event_exports_enabled)) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); - event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, - fbuffer->event, fbuffer->entry, - fbuffer->trace_ctx, fbuffer->regs); + + trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, + fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); + +discard: + if (tt) + event_triggers_post_call(file, tt); + } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -9135,8 +9147,10 @@ static int trace_array_create_dir(struct trace_array *tr) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { tracefs_remove(tr->dir); + return ret; + } init_tracer_tracefs(tr, tr->dir); __update_tracer_options(tr); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a180abf76d4e..4a0e693000c6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1389,38 +1389,6 @@ event_trigger_unlock_commit(struct trace_event_file *file, event_triggers_post_call(file, tt); } -/** - * event_trigger_unlock_commit_regs - handle triggers and finish event commit - * @file: The file pointer associated with the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @trace_ctx: The tracing context flags. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - * - * Same as event_trigger_unlock_commit() but calls - * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). - */ -static inline void -event_trigger_unlock_commit_regs(struct trace_event_file *file, - struct trace_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned int trace_ctx, - struct pt_regs *regs) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit_regs(file->tr, buffer, event, - trace_ctx, regs); - - if (tt) - event_triggers_post_call(file, tt); -} - #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 34325f41ebc0..a48aa2a2875b 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -65,7 +65,8 @@ C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ - C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), #undef C #define C(a, b) HIST_ERR_##a @@ -2156,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, ret = PTR_ERR(operand1); goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + /* String type can not be the operand of unary operator. */ + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + destroy_hist_field(operand1, 0); + ret = -EINVAL; + goto free; + } expr->flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2257,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand1 = NULL; goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str)); + ret = -EINVAL; + goto free; + } /* rest of string could be another expression e.g. b+c in a+b+c */ operand_flags = 0; @@ -2266,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand2 = NULL; goto free; } + if (operand2->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + ret = -EINVAL; + goto free; + } ret = check_expr_operands(file->tr, operand1, operand2); if (ret) @@ -2287,6 +2305,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr->operands[0] = operand1; expr->operands[1] = operand2; + + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + expr->operator = field_op; expr->name = expr_str(expr, 0); expr->type = kstrdup(operand1->type, GFP_KERNEL); @@ -3408,6 +3430,8 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data, event = data->match_data.event; } + if (!event) + goto free; /* * At this point, we're looking at a field on another * event. Because we can't modify a hist trigger on diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index a6c0cdaf4b87..14f46aae1981 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -327,7 +327,7 @@ static void move_to_next_cpu(void) get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); - next_cpu = cpumask_next(smp_processor_id(), current_mask); + next_cpu = cpumask_next(raw_smp_processor_id(), current_mask); put_online_cpus(); if (next_cpu >= nr_cpu_ids) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index a7e3c24dee13..b61eefe5ccf5 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -253,6 +253,7 @@ static struct osnoise_data { */ static bool osnoise_busy; +#ifdef CONFIG_PREEMPT_RT /* * Print the osnoise header info. */ @@ -261,6 +262,35 @@ static void print_osnoise_headers(struct seq_file *s) if (osnoise_data.tainted) seq_puts(s, "# osnoise is tainted!\n"); + seq_puts(s, "# _-------=> irqs-off\n"); + seq_puts(s, "# / _------=> need-resched\n"); + seq_puts(s, "# | / _-----=> need-resched-lazy\n"); + seq_puts(s, "# || / _----=> hardirq/softirq\n"); + seq_puts(s, "# ||| / _---=> preempt-depth\n"); + seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); + seq_puts(s, "# ||||| / _-=> migrate-disable\n"); + + seq_puts(s, "# |||||| / "); + seq_puts(s, " MAX\n"); + + seq_puts(s, "# ||||| / "); + seq_puts(s, " SINGLE Interference counters:\n"); + + seq_puts(s, "# ||||||| RUNTIME "); + seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); + + seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); + seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); + + seq_puts(s, "# | | | ||||||| | | "); + seq_puts(s, " | | | | | | | |\n"); +} +#else /* CONFIG_PREEMPT_RT */ +static void print_osnoise_headers(struct seq_file *s) +{ + if (osnoise_data.tainted) + seq_puts(s, "# osnoise is tainted!\n"); + seq_puts(s, "# _-----=> irqs-off\n"); seq_puts(s, "# / _----=> need-resched\n"); seq_puts(s, "# | / _---=> hardirq/softirq\n"); @@ -279,6 +309,7 @@ static void print_osnoise_headers(struct seq_file *s) seq_puts(s, "# | | | |||| | | "); seq_puts(s, " | | | | | | | |\n"); } +#endif /* CONFIG_PREEMPT_RT */ /* * osnoise_taint - report an osnoise error. @@ -323,6 +354,24 @@ static void trace_osnoise_sample(struct osnoise_sample *sample) /* * Print the timerlat header info. */ +#ifdef CONFIG_PREEMPT_RT +static void print_timerlat_headers(struct seq_file *s) +{ + seq_puts(s, "# _-------=> irqs-off\n"); + seq_puts(s, "# / _------=> need-resched\n"); + seq_puts(s, "# | / _-----=> need-resched-lazy\n"); + seq_puts(s, "# || / _----=> hardirq/softirq\n"); + seq_puts(s, "# ||| / _---=> preempt-depth\n"); + seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); + seq_puts(s, "# ||||| / _-=> migrate-disable\n"); + seq_puts(s, "# |||||| /\n"); + seq_puts(s, "# ||||||| ACTIVATION\n"); + seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); + seq_puts(s, " CONTEXT LATENCY\n"); + seq_puts(s, "# | | | ||||||| | | "); + seq_puts(s, " | |\n"); +} +#else /* CONFIG_PREEMPT_RT */ static void print_timerlat_headers(struct seq_file *s) { seq_puts(s, "# _-----=> irqs-off\n"); @@ -336,6 +385,7 @@ static void print_timerlat_headers(struct seq_file *s) seq_puts(s, "# | | | |||| | | "); seq_puts(s, " | |\n"); } +#endif /* CONFIG_PREEMPT_RT */ /* * Record an timerlat_sample into the tracer buffer. @@ -1025,9 +1075,13 @@ diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample * /* * osnoise_stop_tracing - Stop tracing and the tracer. */ -static void osnoise_stop_tracing(void) +static __always_inline void osnoise_stop_tracing(void) { struct trace_array *tr = osnoise_trace; + + trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, + "stop tracing hit on cpu %d\n", smp_processor_id()); + tracer_tracing_off(tr); } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index fc32821f8240..efd14c79fab4 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -15,12 +15,57 @@ #include <linux/sched/task.h> #include <linux/static_key.h> +enum tp_func_state { + TP_FUNC_0, + TP_FUNC_1, + TP_FUNC_2, + TP_FUNC_N, +}; + extern tracepoint_ptr_t __start___tracepoints_ptrs[]; extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); +enum tp_transition_sync { + TP_TRANSITION_SYNC_1_0_1, + TP_TRANSITION_SYNC_N_2_1, + + _NR_TP_TRANSITION_SYNC, +}; + +struct tp_transition_snapshot { + unsigned long rcu; + unsigned long srcu; + bool ongoing; +}; + +/* Protected by tracepoints_mutex */ +static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; + +static void tp_rcu_get_state(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + /* Keep the latest get_state snapshot. */ + snapshot->rcu = get_state_synchronize_rcu(); + snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = true; +} + +static void tp_rcu_cond_sync(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + if (!snapshot->ongoing) + return; + cond_synchronize_rcu(snapshot->rcu); + if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu)) + synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = false; +} + /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -246,26 +291,29 @@ static void *func_remove(struct tracepoint_func **funcs, return old; } -static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync) +/* + * Count the number of functions (enum tp_func_state) in a tp_funcs array. + */ +static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs) +{ + if (!tp_funcs) + return TP_FUNC_0; + if (!tp_funcs[1].func) + return TP_FUNC_1; + if (!tp_funcs[2].func) + return TP_FUNC_2; + return TP_FUNC_N; /* 3 or more */ +} + +static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) { void *func = tp->iterator; /* Synthetic events do not have static call sites */ if (!tp->static_call_key) return; - - if (!tp_funcs[1].func) { + if (nr_func_state(tp_funcs) == TP_FUNC_1) func = tp_funcs[0].func; - /* - * If going from the iterator back to a single caller, - * we need to synchronize with __DO_TRACE to make sure - * that the data passed to the callback is the one that - * belongs to that callback. - */ - if (sync) - tracepoint_synchronize_unregister(); - } - __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } @@ -299,9 +347,41 @@ static int tracepoint_add_func(struct tracepoint *tp, * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ - tracepoint_update_call(tp, tp_funcs, false); - rcu_assign_pointer(tp->funcs, tp_funcs); - static_key_enable(&tp->key); + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_1: /* 0->1 */ + /* + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. + */ + tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1); + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, tp_funcs); + static_key_enable(&tp->key); + break; + case TP_FUNC_2: /* 1->2 */ + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* + * Iterator callback installed before updating tp->funcs. + * Requires ordering between RCU assign/dereference and + * static call update/call. + */ + fallthrough; + case TP_FUNC_N: /* N->N+1 (N>1) */ + rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>1) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + break; + default: + WARN_ON_ONCE(1); + break; + } release_probes(old); return 0; @@ -328,17 +408,52 @@ static int tracepoint_remove_func(struct tracepoint *tp, /* Failed allocating new tp_funcs, replaced func with stub */ return 0; - if (!tp_funcs) { + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_0: /* 1->0 */ /* Removed last function */ if (tp->unregfunc && static_key_enabled(&tp->key)) tp->unregfunc(); static_key_disable(&tp->key); + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, NULL); + /* + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. + */ + tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1); + break; + case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); - } else { + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. If the first + * element's data has changed, then force the synchronization + * to prevent current readers that have loaded the old data + * from calling the new function. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1); + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + break; + case TP_FUNC_2: /* N->N-1 (N>2) */ + fallthrough; + case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); - tracepoint_update_call(tp, tp_funcs, - tp_funcs[0].func != old[0].func); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + break; + default: + WARN_ON_ONCE(1); + break; } release_probes(old); return 0; diff --git a/kernel/ucount.c b/kernel/ucount.c index 87799e2379bd..bb51849e6375 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -58,14 +58,17 @@ static struct ctl_table_root set_root = { .permissions = set_permissions, }; -#define UCOUNT_ENTRY(name) \ - { \ - .procname = name, \ - .maxlen = sizeof(int), \ - .mode = 0644, \ - .proc_handler = proc_dointvec_minmax, \ - .extra1 = SYSCTL_ZERO, \ - .extra2 = SYSCTL_INT_MAX, \ +static long ue_zero = 0; +static long ue_int_max = INT_MAX; + +#define UCOUNT_ENTRY(name) \ + { \ + .procname = name, \ + .maxlen = sizeof(long), \ + .mode = 0644, \ + .proc_handler = proc_doulongvec_minmax, \ + .extra1 = &ue_zero, \ + .extra2 = &ue_int_max, \ } static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_user_namespaces"), @@ -160,6 +163,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; + long overflow; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -184,8 +188,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } } + overflow = atomic_add_negative(1, &ucounts->count); spin_unlock_irq(&ucounts_lock); - ucounts = get_ucounts(ucounts); + if (overflow) { + put_ucounts(ucounts); + return NULL; + } return ucounts; } @@ -193,8 +201,7 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); diff --git a/lib/devmem_is_allowed.c b/lib/devmem_is_allowed.c index c0d67c541849..60be9e24bd57 100644 --- a/lib/devmem_is_allowed.c +++ b/lib/devmem_is_allowed.c @@ -19,7 +19,7 @@ */ int devmem_is_allowed(unsigned long pfn) { - if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + if (iomem_is_exclusive(PFN_PHYS(pfn))) return 0; if (!page_is_ram(pfn)) return 1; diff --git a/lib/once.c b/lib/once.c index 8b7d6235217e..59149bf3bfb4 100644 --- a/lib/once.c +++ b/lib/once.c @@ -3,10 +3,12 @@ #include <linux/spinlock.h> #include <linux/once.h> #include <linux/random.h> +#include <linux/module.h> struct once_work { struct work_struct work; struct static_key_true *key; + struct module *module; }; static void once_deferred(struct work_struct *w) @@ -16,10 +18,11 @@ static void once_deferred(struct work_struct *w) work = container_of(w, struct once_work, work); BUG_ON(!static_key_enabled(work->key)); static_branch_disable(work->key); + module_put(work->module); kfree(work); } -static void once_disable_jump(struct static_key_true *key) +static void once_disable_jump(struct static_key_true *key, struct module *mod) { struct once_work *w; @@ -29,6 +32,8 @@ static void once_disable_jump(struct static_key_true *key) INIT_WORK(&w->work, once_deferred); w->key = key; + w->module = mod; + __module_get(mod); schedule_work(&w->work); } @@ -53,11 +58,11 @@ bool __do_once_start(bool *done, unsigned long *flags) EXPORT_SYMBOL(__do_once_start); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags) + unsigned long *flags, struct module *mod) __releases(once_lock) { *done = true; spin_unlock_irqrestore(&once_lock, *flags); - once_disable_jump(once_key); + once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index f6d5d30d01bf..44d8197bbffb 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -461,6 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self) return __bpf_fill_stxdw(self, BPF_DW); } +static int bpf_fill_long_jmp(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1); + insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1); + + /* + * Fill with a complex 64-bit operation that expands to a lot of + * instructions on 32-bit JITs. The large jump offset can then + * overflow the conditional branch field size, triggering a branch + * conversion mechanism in some JITs. + * + * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch + * conversion on the 32-bit MIPS JIT. For other JITs, the instruction + * count and/or operation may need to be modified to trigger the + * branch conversion. + */ + for (i = 2; i < len - 1; i++) + insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i); + + insn[len - 1] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static struct bpf_test tests[] = { { "TAX", @@ -1917,6 +1952,163 @@ static struct bpf_test tests[] = { { { 0, -1 } } }, { + /* + * Register (non-)clobbering test, in the case where a 32-bit + * JIT implements complex ALU64 operations via function calls. + * If so, the function call must be invisible in the eBPF + * registers. The JIT must then save and restore relevant + * registers during the call. The following tests check that + * the eBPF registers retain their values after such a call. + */ + "INT: Register clobbering, R1 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 123456789), + BPF_ALU32_IMM(BPF_MOV, R2, 2), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R1, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: Register clobbering, R2 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R2, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + /* + * Test 32-bit JITs that implement complex ALU64 operations as + * function calls R0 = f(R1, R2), and must re-arrange operands. + */ +#define NUMER 0xfedcba9876543210ULL +#define DENOM 0x0123456789abcdefULL + "ALU64_DIV X: Operand register permutations", + .u.insns_int = { + /* R0 / R2 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R2), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R0 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R0), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R0 / R1 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R0 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R0), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R1 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R1), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R2 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R2), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R1 */ + BPF_LD_IMM64(R1, NUMER), + BPF_ALU64_REG(BPF_DIV, R1, R1), + BPF_JMP_IMM(BPF_JEQ, R1, 1, 1), + BPF_EXIT_INSN(), + /* R2 / R2 */ + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R2), + BPF_JMP_IMM(BPF_JEQ, R2, 1, 1), + BPF_EXIT_INSN(), + /* R3 / R4 */ + BPF_LD_IMM64(R3, NUMER), + BPF_LD_IMM64(R4, DENOM), + BPF_ALU64_REG(BPF_DIV, R3, R4), + BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* Successful return */ + BPF_LD_IMM64(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, +#undef NUMER +#undef DENOM + }, +#ifdef CONFIG_32BIT + { + "INT: 32-bit context pointer word order and zero-extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_JMP32_IMM(BPF_JNE, R1, 0, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, +#endif + { "check: missing ret", .u.insns = { BPF_STMT(BPF_LD | BPF_IMM, 1), @@ -2361,6 +2553,48 @@ static struct bpf_test tests[] = { { { 0, 0x1 } }, }, { + "ALU_MOV_K: small negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "ALU_MOV_K: small negative zero extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU_MOV_K: large negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123456789), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123456789 } } + }, + { + "ALU_MOV_K: large negative zero extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123456789), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { "ALU64_MOV_K: dst = 2", .u.insns_int = { BPF_ALU64_IMM(BPF_MOV, R0, 2), @@ -2412,6 +2646,48 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_MOV_K: small negative", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "ALU64_MOV_K: small negative sign extension", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } } + }, + { + "ALU64_MOV_K: large negative", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123456789), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123456789 } } + }, + { + "ALU64_MOV_K: large negative sign extension", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123456789), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } } + }, /* BPF_ALU | BPF_ADD | BPF_X */ { "ALU_ADD_X: 1 + 2 = 3", @@ -2967,6 +3243,31 @@ static struct bpf_test tests[] = { { }, { { 0, 2147483647 } }, }, + { + "ALU64_MUL_X: 64x64 multiply, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0fedcba987654321LL), + BPF_LD_IMM64(R1, 0x123456789abcdef0LL), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xe5618cf0 } } + }, + { + "ALU64_MUL_X: 64x64 multiply, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0fedcba987654321LL), + BPF_LD_IMM64(R1, 0x123456789abcdef0LL), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x2236d88f } } + }, /* BPF_ALU | BPF_MUL | BPF_K */ { "ALU_MUL_K: 2 * 3 = 6", @@ -3077,6 +3378,29 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_MUL_K: 64x32 multiply, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xe242d208 } } + }, + { + "ALU64_MUL_K: 64x32 multiply, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xc28f5c28 } } + }, /* BPF_ALU | BPF_DIV | BPF_X */ { "ALU_DIV_X: 6 / 2 = 3", @@ -3431,6 +3755,44 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { + "ALU_AND_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_AND, R0, 15), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4 } } + }, + { + "ALU_AND_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4), + BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xa1b2c3d4 } } + }, + { + "ALU_AND_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL), + BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { "ALU64_AND_K: 3 & 2 = 2", .u.insns_int = { BPF_LD_IMM64(R0, 3), @@ -3453,7 +3815,7 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { - "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000", + "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000000000000000LL), @@ -3469,7 +3831,7 @@ static struct bpf_test tests[] = { { { 0, 0x1 } }, }, { - "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff", + "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), @@ -3500,6 +3862,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_AND_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000090b0d0fLL), + BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_AND_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL), + BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_OR | BPF_X */ { "ALU_OR_X: 1 | 2 = 3", @@ -3573,6 +3967,44 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { + "ALU_OR_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_OR, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01020305 } } + }, + { + "ALU_OR_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xa1b2c3d4 } } + }, + { + "ALU_OR_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL), + BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { "ALU64_OR_K: 1 | 2 = 3", .u.insns_int = { BPF_LD_IMM64(R0, 1), @@ -3595,7 +4027,7 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { - "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000", + "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), @@ -3642,6 +4074,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_OR_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x012345678fafcfefLL), + BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_OR_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL), + BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_XOR | BPF_X */ { "ALU_XOR_X: 5 ^ 6 = 3", @@ -3715,6 +4179,44 @@ static struct bpf_test tests[] = { { { 0, 0xfffffffe } }, }, { + "ALU_XOR_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_XOR, R0, 15), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x0102030b } } + }, + { + "ALU_XOR_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4), + BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x5e4d3c2b } } + }, + { + "ALU_XOR_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000795b3d1fLL), + BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { "ALU64_XOR_K: 5 ^ 6 = 3", .u.insns_int = { BPF_LD_IMM64(R0, 5), @@ -3726,7 +4228,7 @@ static struct bpf_test tests[] = { { { 0, 3 } }, }, { - "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe", + "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe", .u.insns_int = { BPF_LD_IMM64(R0, 1), BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff), @@ -3784,6 +4286,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_XOR_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL), + BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_XOR_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL), + BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_LSH | BPF_X */ { "ALU_LSH_X: 1 << 1 = 2", @@ -3810,6 +4344,18 @@ static struct bpf_test tests[] = { { { 0, 0x80000000 } }, }, { + "ALU_LSH_X: 0x12345678 << 12 = 0x45678000", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x45678000 } } + }, + { "ALU64_LSH_X: 1 << 1 = 2", .u.insns_int = { BPF_LD_IMM64(R0, 1), @@ -3833,6 +4379,106 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU64_LSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xbcdef000 } } + }, + { + "ALU64_LSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x3456789a } } + }, + { + "ALU64_LSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x9abcdef0 } } + }, + { + "ALU64_LSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } } + }, /* BPF_ALU | BPF_LSH | BPF_K */ { "ALU_LSH_K: 1 << 1 = 2", @@ -3857,6 +4503,28 @@ static struct bpf_test tests[] = { { { 0, 0x80000000 } }, }, { + "ALU_LSH_K: 0x12345678 << 12 = 0x45678000", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_LSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x45678000 } } + }, + { + "ALU_LSH_K: 0x12345678 << 0 = 0x12345678", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_LSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12345678 } } + }, + { "ALU64_LSH_K: 1 << 1 = 2", .u.insns_int = { BPF_LD_IMM64(R0, 1), @@ -3878,6 +4546,86 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU64_LSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xbcdef000 } } + }, + { + "ALU64_LSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x3456789a } } + }, + { + "ALU64_LSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x9abcdef0 } } + }, + { + "ALU64_LSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_K: Zero shift", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_RSH | BPF_X */ { "ALU_RSH_X: 2 >> 1 = 1", @@ -3904,6 +4652,18 @@ static struct bpf_test tests[] = { { { 0, 1 } }, }, { + "ALU_RSH_X: 0x12345678 >> 20 = 0x123", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_MOV, R1, 20), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x123 } } + }, + { "ALU64_RSH_X: 2 >> 1 = 1", .u.insns_int = { BPF_LD_IMM64(R0, 2), @@ -3927,6 +4687,106 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU64_RSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_RSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x00081234 } } + }, + { + "ALU64_RSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x08123456 } } + }, + { + "ALU64_RSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_RSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_RSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, /* BPF_ALU | BPF_RSH | BPF_K */ { "ALU_RSH_K: 2 >> 1 = 1", @@ -3951,6 +4811,28 @@ static struct bpf_test tests[] = { { { 0, 1 } }, }, { + "ALU_RSH_K: 0x12345678 >> 20 = 0x123", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_RSH, R0, 20), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x123 } } + }, + { + "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_RSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12345678 } } + }, + { "ALU64_RSH_K: 2 >> 1 = 1", .u.insns_int = { BPF_LD_IMM64(R0, 2), @@ -3972,9 +4854,101 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU64_RSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_RSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x00081234 } } + }, + { + "ALU64_RSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x08123456 } } + }, + { + "ALU64_RSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_RSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_K: Zero shift", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_ARSH | BPF_X */ { - "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + "ALU32_ARSH_X: -1234 >> 7 = -10", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_MOV, R1, 7), + BPF_ALU32_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -10 } } + }, + { + "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { BPF_LD_IMM64(R0, 0xff00ff0000000000LL), BPF_ALU32_IMM(BPF_MOV, R1, 40), @@ -3985,9 +4959,131 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffff00ff } }, }, + { + "ALU64_ARSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_ARSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfff81234 } } + }, + { + "ALU64_ARSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xf8123456 } } + }, + { + "ALU64_ARSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_ARSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_ARSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, /* BPF_ALU | BPF_ARSH | BPF_K */ { - "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + "ALU32_ARSH_K: -1234 >> 7 = -10", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_ARSH, R0, 7), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -10 } } + }, + { + "ALU32_ARSH_K: -1234 >> 0 = -1234", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_ARSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1234 } } + }, + { + "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { BPF_LD_IMM64(R0, 0xff00ff0000000000LL), BPF_ALU64_IMM(BPF_ARSH, R0, 40), @@ -3997,6 +5093,86 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffff00ff } }, }, + { + "ALU64_ARSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_ARSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfff81234 } } + }, + { + "ALU64_ARSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xf8123456 } } + }, + { + "ALU64_ARSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xf123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_ARSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_K: Zero shoft", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_NEG */ { "ALU_NEG: -(3) = -3", @@ -4295,80 +5471,346 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, .stack_depth = 40, }, + { + "STX_MEM_DW: Store double word: first word in memory", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0x0123456789abcdefLL), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, +#ifdef __BIG_ENDIAN + { { 0, 0x01234567 } }, +#else + { { 0, 0x89abcdef } }, +#endif + .stack_depth = 40, + }, + { + "STX_MEM_DW: Store double word: second word in memory", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0x0123456789abcdefLL), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, +#ifdef __BIG_ENDIAN + { { 0, 0x89abcdef } }, +#else + { { 0, 0x01234567 } }, +#endif + .stack_depth = 40, + }, /* BPF_STX | BPF_ATOMIC | BPF_W/DW */ { - "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", + "STX_XADD_W: X + 1 + 1 + 1 + ...", + { }, + INTERNAL, + { }, + { { 0, 4134 } }, + .fill_helper = bpf_fill_stxw, + }, + { + "STX_XADD_DW: X + 1 + 1 + 1 + ...", + { }, + INTERNAL, + { }, + { { 0, 4134 } }, + .fill_helper = bpf_fill_stxdw, + }, + /* + * Exhaustive tests of atomic operation variants. + * Individual tests are expanded from template macros for all + * combinations of ALU operation, word size and fetching. + */ +#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU32_IMM(BPF_MOV, R5, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R5, -40), \ + BPF_LDX_MEM(width, R0, R10, -40), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, result } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU64_REG(BPF_MOV, R1, R10), \ + BPF_ALU32_IMM(BPF_MOV, R0, update), \ + BPF_ST_MEM(BPF_W, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R0, -40), \ + BPF_ALU64_REG(BPF_MOV, R0, R10), \ + BPF_ALU64_REG(BPF_SUB, R0, R1), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, 0 } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU64_REG(BPF_MOV, R0, R10), \ + BPF_ALU32_IMM(BPF_MOV, R1, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R1, -40), \ + BPF_ALU64_REG(BPF_SUB, R0, R10), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, 0 } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test fetch: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU32_IMM(BPF_MOV, R3, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R3, -40), \ + BPF_ALU64_REG(BPF_MOV, R0, R3), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, (op) & BPF_FETCH ? old : update } }, \ + .stack_depth = 40, \ +} + /* BPF_ATOMIC | BPF_W: BPF_ADD */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_DW: BPF_ADD */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_W: BPF_AND */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_DW: BPF_AND */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_W: BPF_OR */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_DW: BPF_OR */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_W: BPF_XOR */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_DW: BPF_XOR */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_W: BPF_XCHG */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + /* BPF_ATOMIC | BPF_DW: BPF_XCHG */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), +#undef BPF_ATOMIC_OP_TEST1 +#undef BPF_ATOMIC_OP_TEST2 +#undef BPF_ATOMIC_OP_TEST3 +#undef BPF_ATOMIC_OP_TEST4 + /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */ + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), BPF_LDX_MEM(BPF_W, R0, R10, -40), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x22 } }, + { { 0, 0x89abcdef } }, .stack_depth = 40, }, { - "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return", .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R1, R10), - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), - BPF_ALU64_REG(BPF_MOV, R0, R10), - BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0 } }, + { { 0, 0x01234567 } }, .stack_depth = 40, }, { - "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x12 } }, + { { 0, 0x01234567 } }, .stack_depth = 40, }, { - "STX_XADD_W: X + 1 + 1 + 1 + ...", + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_ALU32_REG(BPF_MOV, R0, R3), + BPF_EXIT_INSN(), + }, + INTERNAL, { }, + { { 0, 0x89abcdef } }, + .stack_depth = 40, + }, + /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */ + { + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, INTERNAL, { }, - { { 0, 4134 } }, - .fill_helper = bpf_fill_stxw, + { { 0, 0 } }, + .stack_depth = 40, }, { - "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R0, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_REG(BPF_SUB, R0, R2), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x22 } }, + { { 0, 0 } }, .stack_depth = 40, }, { - "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return", .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R1, R10), - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), - BPF_ALU64_REG(BPF_MOV, R0, R10), + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), BPF_ALU64_REG(BPF_SUB, R0, R1), BPF_EXIT_INSN(), }, @@ -4378,25 +5820,552 @@ static struct bpf_test tests[] = { .stack_depth = 40, }, { - "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x12 } }, + { { 0, 0 } }, .stack_depth = 40, }, { - "STX_XADD_DW: X + 1 + 1 + 1 + ...", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_LD_IMM64(R0, 0xfecdba9876543210ULL), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_EXIT_INSN(), + }, + INTERNAL, { }, + { { 0, 0 } }, + .stack_depth = 40, + }, + /* BPF_JMP32 | BPF_JEQ | BPF_K */ + { + "JMP32_JEQ_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, INTERNAL, { }, - { { 0, 4134 } }, - .fill_helper = bpf_fill_stxdw, + { { 0, 123 } } + }, + { + "JMP32_JEQ_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 12345678), + BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 12345678 } } + }, + { + "JMP32_JEQ_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JEQ | BPF_X */ + { + "JMP32_JEQ_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1234), + BPF_ALU32_IMM(BPF_MOV, R1, 4321), + BPF_JMP32_REG(BPF_JEQ, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1234), + BPF_JMP32_REG(BPF_JEQ, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1234 } } + }, + /* BPF_JMP32 | BPF_JNE | BPF_K */ + { + "JMP32_JNE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JNE, R0, 123, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 321, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JNE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 12345678), + BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 12345678 } } + }, + { + "JMP32_JNE_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JNE, R0, -123, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JNE | BPF_X */ + { + "JMP32_JNE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1234), + BPF_ALU32_IMM(BPF_MOV, R1, 1234), + BPF_JMP32_REG(BPF_JNE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 4321), + BPF_JMP32_REG(BPF_JNE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1234 } } + }, + /* BPF_JMP32 | BPF_JSET | BPF_K */ + { + "JMP32_JSET_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 2, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 3, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "JMP32_JSET_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000), + BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x40000000 } } + }, + { + "JMP32_JSET_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSET, R0, -1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JSET | BPF_X */ + { + "JMP32_JSET_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 8), + BPF_ALU32_IMM(BPF_MOV, R1, 7), + BPF_JMP32_REG(BPF_JSET, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2), + BPF_JMP32_REG(BPF_JNE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 8 } } + }, + /* BPF_JMP32 | BPF_JGT | BPF_K */ + { + "JMP32_JGT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JGT, R0, 123, 1), + BPF_JMP32_IMM(BPF_JGT, R0, 122, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JGT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGT | BPF_X */ + { + "JMP32_JGT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JGT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JGT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGE | BPF_K */ + { + "JMP32_JGE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JGE, R0, 124, 1), + BPF_JMP32_IMM(BPF_JGE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JGE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGE | BPF_X */ + { + "JMP32_JGE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JGE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe), + BPF_JMP32_REG(BPF_JGE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLT | BPF_K */ + { + "JMP32_JLT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JLT, R0, 123, 1), + BPF_JMP32_IMM(BPF_JLT, R0, 124, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JLT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1), + BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLT | BPF_X */ + { + "JMP32_JLT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JLT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JLT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLE | BPF_K */ + { + "JMP32_JLE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JLE, R0, 122, 1), + BPF_JMP32_IMM(BPF_JLE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JLE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1), + BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLE | BPF_X */ + { + "JMP32_JLE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JLE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe), + BPF_JMP32_REG(BPF_JLE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JSGT | BPF_K */ + { + "JMP32_JSGT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1), + BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSGT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1), + BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGT | BPF_X */ + { + "JMP32_JSGT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSGT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345679), + BPF_JMP32_REG(BPF_JSGT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGE | BPF_K */ + { + "JMP32_JSGE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1), + BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSGE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1), + BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGE | BPF_X */ + { + "JMP32_JSGE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345677), + BPF_JMP32_REG(BPF_JSGE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSGE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLT | BPF_K */ + { + "JMP32_JSLT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1), + BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSLT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1), + BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLT | BPF_X */ + { + "JMP32_JSLT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSLT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345677), + BPF_JMP32_REG(BPF_JSLT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLE | BPF_K */ + { + "JMP32_JSLE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1), + BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSLE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1), + BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLE | BPF_K */ + { + "JMP32_JSLE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345679), + BPF_JMP32_REG(BPF_JSLE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSLE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } }, /* BPF_JMP | BPF_EXIT */ { @@ -5223,6 +7192,14 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Very long conditional jump", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_long_jmp, + }, { "JMP_JA: Jump, gap, jump, ...", { }, @@ -7012,8 +8989,248 @@ static __init int test_bpf(void) return err_cnt ? -EINVAL : 0; } +struct tail_call_test { + const char *descr; + struct bpf_insn insns[MAX_INSNS]; + int result; + int stack_depth; +}; + +/* + * Magic marker used in test snippets for tail calls below. + * BPF_LD/MOV to R2 and R2 with this immediate value is replaced + * with the proper values by the test runner. + */ +#define TAIL_CALL_MARKER 0x7a11ca11 + +/* Special offset to indicate a NULL call target */ +#define TAIL_CALL_NULL 0x7fff + +/* Special offset to indicate an out-of-range index */ +#define TAIL_CALL_INVALID 0x7ffe + +#define TAIL_CALL(offset) \ + BPF_LD_IMM64(R2, TAIL_CALL_MARKER), \ + BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \ + offset, TAIL_CALL_MARKER), \ + BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0) + +/* + * Tail call tests. Each test case may call any other test in the table, + * including itself, specified as a relative index offset from the calling + * test. The index TAIL_CALL_NULL can be used to specify a NULL target + * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID + * results in a target index that is out of range. + */ +static struct tail_call_test tail_call_tests[] = { + { + "Tail call leaf", + .insns = { + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, + { + "Tail call 2", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 2), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 3, + }, + { + "Tail call 3", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 3), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 6, + }, + { + "Tail call 4", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 4), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 10, + }, + { + "Tail call error path, max count reached", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_ALU64_REG(BPF_MOV, R0, R1), + TAIL_CALL(0), + BPF_EXIT_INSN(), + }, + .result = MAX_TAIL_CALL_CNT + 1, + }, + { + "Tail call error path, NULL target", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + TAIL_CALL(TAIL_CALL_NULL), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, + { + "Tail call error path, index out of range", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + TAIL_CALL(TAIL_CALL_INVALID), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, +}; + +static void __init destroy_tail_call_tests(struct bpf_array *progs) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) + if (progs->ptrs[i]) + bpf_prog_free(progs->ptrs[i]); + kfree(progs); +} + +static __init int prepare_tail_call_tests(struct bpf_array **pprogs) +{ + int ntests = ARRAY_SIZE(tail_call_tests); + struct bpf_array *progs; + int which, err; + + /* Allocate the table of programs to be used for tall calls */ + progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]), + GFP_KERNEL); + if (!progs) + goto out_nomem; + + /* Create all eBPF programs and populate the table */ + for (which = 0; which < ntests; which++) { + struct tail_call_test *test = &tail_call_tests[which]; + struct bpf_prog *fp; + int len, i; + + /* Compute the number of program instructions */ + for (len = 0; len < MAX_INSNS; len++) { + struct bpf_insn *insn = &test->insns[len]; + + if (len < MAX_INSNS - 1 && + insn->code == (BPF_LD | BPF_DW | BPF_IMM)) + len++; + if (insn->code == 0) + break; + } + + /* Allocate and initialize the program */ + fp = bpf_prog_alloc(bpf_prog_size(len), 0); + if (!fp) + goto out_nomem; + + fp->len = len; + fp->type = BPF_PROG_TYPE_SOCKET_FILTER; + fp->aux->stack_depth = test->stack_depth; + memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn)); + + /* Relocate runtime tail call offsets and addresses */ + for (i = 0; i < len; i++) { + struct bpf_insn *insn = &fp->insnsi[i]; + + if (insn->imm != TAIL_CALL_MARKER) + continue; + + switch (insn->code) { + case BPF_LD | BPF_DW | BPF_IMM: + insn[0].imm = (u32)(long)progs; + insn[1].imm = ((u64)(long)progs) >> 32; + break; + + case BPF_ALU | BPF_MOV | BPF_K: + if (insn->off == TAIL_CALL_NULL) + insn->imm = ntests; + else if (insn->off == TAIL_CALL_INVALID) + insn->imm = ntests + 1; + else + insn->imm = which + insn->off; + insn->off = 0; + } + } + + fp = bpf_prog_select_runtime(fp, &err); + if (err) + goto out_err; + + progs->ptrs[which] = fp; + } + + /* The last entry contains a NULL program pointer */ + progs->map.max_entries = ntests + 1; + *pprogs = progs; + return 0; + +out_nomem: + err = -ENOMEM; + +out_err: + if (progs) + destroy_tail_call_tests(progs); + return err; +} + +static __init int test_tail_calls(struct bpf_array *progs) +{ + int i, err_cnt = 0, pass_cnt = 0; + int jit_cnt = 0, run_cnt = 0; + + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) { + struct tail_call_test *test = &tail_call_tests[i]; + struct bpf_prog *fp = progs->ptrs[i]; + u64 duration; + int ret; + + cond_resched(); + + pr_info("#%d %s ", i, test->descr); + if (!fp) { + err_cnt++; + continue; + } + pr_cont("jited:%u ", fp->jited); + + run_cnt++; + if (fp->jited) + jit_cnt++; + + ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration); + if (ret == test->result) { + pr_cont("%lld PASS", duration); + pass_cnt++; + } else { + pr_cont("ret %d != %d FAIL", ret, test->result); + err_cnt++; + } + } + + pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n", + __func__, pass_cnt, err_cnt, jit_cnt, run_cnt); + + return err_cnt ? -EINVAL : 0; +} + static int __init test_bpf_init(void) { + struct bpf_array *progs = NULL; int ret; ret = prepare_bpf_tests(); @@ -7025,6 +9242,14 @@ static int __init test_bpf_init(void) if (ret) return ret; + ret = prepare_tail_call_tests(&progs); + if (ret) + return ret; + ret = test_tail_calls(progs); + destroy_tail_call_tests(progs); + if (ret) + return ret; + return test_skb_segment(); } @@ -1558,9 +1558,12 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, gup_flags |= FOLL_WRITE; /* - * See check_vma_flags(): Will return -EFAULT on incompatible mappings - * or with insufficient permissions. + * We want to report -EINVAL instead of -EFAULT for any permission + * problems or incompatible mappings. */ + if (check_vma_flags(vma, gup_flags)) + return -EINVAL; + return __get_user_pages(mm, start, nr_pages, gup_flags, NULL, NULL, locked); } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 228a2fbe0657..73d46d16d575 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -290,7 +290,7 @@ static void hex_dump_object(struct seq_file *seq, warn_or_seq_printf(seq, " hex dump (first %zu bytes):\n", len); kasan_disable_current(); warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE, - HEX_GROUP_SIZE, ptr, len, HEX_ASCII); + HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII); kasan_enable_current(); } @@ -1171,7 +1171,7 @@ static bool update_checksum(struct kmemleak_object *object) kasan_disable_current(); kcsan_disable_current(); - object->checksum = crc32(0, (void *)object->pointer, object->size); + object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size); kasan_enable_current(); kcsan_enable_current(); @@ -1246,7 +1246,7 @@ static void scan_block(void *_start, void *_end, break; kasan_disable_current(); - pointer = *ptr; + pointer = *(unsigned long *)kasan_reset_tag((void *)ptr); kasan_enable_current(); untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); diff --git a/mm/madvise.c b/mm/madvise.c index 6d3d348b17f4..5c065bc8b5f6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -862,10 +862,12 @@ static long madvise_populate(struct vm_area_struct *vma, switch (pages) { case -EINTR: return -EINTR; - case -EFAULT: /* Incompatible mappings / permissions. */ + case -EINVAL: /* Incompatible mappings / permissions. */ return -EINVAL; case -EHWPOISON: return -EHWPOISON; + case -EFAULT: /* VM_FAULT_SIGBUS or VM_FAULT_SIGSEGV */ + return -EFAULT; default: pr_warn_once("%s: unhandled return value: %ld\n", __func__, pages); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ef06f9e0db1..389b5766e74f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3106,13 +3106,15 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, stock->cached_pgdat = pgdat; } else if (stock->cached_pgdat != pgdat) { /* Flush the existing cached vmstat data */ + struct pglist_data *oldpg = stock->cached_pgdat; + if (stock->nr_slab_reclaimable_b) { - mod_objcg_mlstate(objcg, pgdat, NR_SLAB_RECLAIMABLE_B, + mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B, stock->nr_slab_reclaimable_b); stock->nr_slab_reclaimable_b = 0; } if (stock->nr_slab_unreclaimable_b) { - mod_objcg_mlstate(objcg, pgdat, NR_SLAB_UNRECLAIMABLE_B, + mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B, stock->nr_slab_unreclaimable_b); stock->nr_slab_unreclaimable_b = 0; } @@ -7048,14 +7050,14 @@ void mem_cgroup_sk_free(struct sock *sk) * mem_cgroup_charge_skmem - charge socket memory * @memcg: memcg to charge * @nr_pages: number of pages to charge + * @gfp_mask: reclaim mode * * Charges @nr_pages to @memcg. Returns %true if the charge fit within - * @memcg's configured limit, %false if the charge had to be forced. + * @memcg's configured limit, %false if it doesn't. */ -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, + gfp_t gfp_mask) { - gfp_t gfp_mask = GFP_KERNEL; - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { struct page_counter *fail; @@ -7063,21 +7065,19 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) memcg->tcpmem_pressure = 0; return true; } - page_counter_charge(&memcg->tcpmem, nr_pages); memcg->tcpmem_pressure = 1; + if (gfp_mask & __GFP_NOFAIL) { + page_counter_charge(&memcg->tcpmem, nr_pages); + return true; + } return false; } - /* Don't block in the packet receive path */ - if (in_softirq()) - gfp_mask = GFP_NOWAIT; - - mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); - - if (try_charge(memcg, gfp_mask, nr_pages) == 0) + if (try_charge(memcg, gfp_mask, nr_pages) == 0) { + mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); return true; + } - try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages); return false; } diff --git a/mm/slub.c b/mm/slub.c index af984e4990e8..f77d8cd79ef7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -576,8 +576,8 @@ static void print_section(char *level, char *text, u8 *addr, unsigned int length) { metadata_access_enable(); - print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS, - 16, 1, addr, length, 1); + print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, + 16, 1, kasan_reset_tag((void *)addr), length, 1); metadata_access_disable(); } @@ -1400,12 +1400,13 @@ check_slabs: static int __init setup_slub_debug(char *str) { slab_flags_t flags; + slab_flags_t global_flags; char *saved_str; char *slab_list; bool global_slub_debug_changed = false; bool slab_list_specified = false; - slub_debug = DEBUG_DEFAULT_FLAGS; + global_flags = DEBUG_DEFAULT_FLAGS; if (*str++ != '=' || !*str) /* * No options specified. Switch on full debugging. @@ -1417,7 +1418,7 @@ static int __init setup_slub_debug(char *str) str = parse_slub_debug_flags(str, &flags, &slab_list, true); if (!slab_list) { - slub_debug = flags; + global_flags = flags; global_slub_debug_changed = true; } else { slab_list_specified = true; @@ -1426,16 +1427,18 @@ static int __init setup_slub_debug(char *str) /* * For backwards compatibility, a single list of flags with list of - * slabs means debugging is only enabled for those slabs, so the global - * slub_debug should be 0. We can extended that to multiple lists as + * slabs means debugging is only changed for those slabs, so the global + * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending + * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as * long as there is no option specifying flags without a slab list. */ if (slab_list_specified) { if (!global_slub_debug_changed) - slub_debug = 0; + global_flags = slub_debug; slub_debug_string = saved_str; } out: + slub_debug = global_flags; if (slub_debug != 0 || slub_debug_string) static_branch_enable(&slub_debug_enabled); else @@ -3236,12 +3239,12 @@ struct detached_freelist { struct kmem_cache *s; }; -static inline void free_nonslab_page(struct page *page) +static inline void free_nonslab_page(struct page *page, void *object) { unsigned int order = compound_order(page); VM_BUG_ON_PAGE(!PageCompound(page), page); - kfree_hook(page_address(page)); + kfree_hook(object); mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); __free_pages(page, order); } @@ -3282,7 +3285,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!s) { /* Handle kalloc'ed objects */ if (unlikely(!PageSlab(page))) { - free_nonslab_page(page); + free_nonslab_page(page, object); p[size] = NULL; /* mark object processed */ return size; } @@ -4258,7 +4261,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { - free_nonslab_page(page); + free_nonslab_page(page, object); return; } slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c index 1c140af06d52..600b9563bfc5 100644 --- a/net/6lowpan/debugfs.c +++ b/net/6lowpan/debugfs.c @@ -170,7 +170,8 @@ static void lowpan_dev_debugfs_ctx_init(struct net_device *dev, struct dentry *root; char buf[32]; - WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE); + if (WARN_ON_ONCE(id >= LOWPAN_IPHC_CTX_TABLE_SIZE)) + return; sprintf(buf, "%d", id); diff --git a/net/802/Makefile b/net/802/Makefile index 19406a87bdaa..bfed80221b8b 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_LLC) += p8022.o psnap.o obj-$(CONFIG_NET_FC) += fc.o obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o -obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o diff --git a/net/802/p8023.c b/net/802/p8023.c deleted file mode 100644 index 19cd56990db2..000000000000 --- a/net/802/p8023.c +++ /dev/null @@ -1,60 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * NET3: 802.3 data link hooks used for IPX 802.3 - * - * 802.3 isn't really a protocol data link layer. Some old IPX stuff - * uses it however. Note that there is only one 802.3 protocol layer - * in the system. We don't currently support different protocols - * running raw 802.3 on different devices. Thankfully nobody else - * has done anything like the old IPX. - */ - -#include <linux/in.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/slab.h> - -#include <net/datalink.h> -#include <net/p8022.h> - -/* - * Place an 802.3 header on a packet. The driver will do the mac - * addresses, we just need to give it the buffer length. - */ -static int p8023_request(struct datalink_proto *dl, - struct sk_buff *skb, unsigned char *dest_node) -{ - struct net_device *dev = skb->dev; - - dev_hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len); - return dev_queue_xmit(skb); -} - -/* - * Create an 802.3 client. Note there can be only one 802.3 client - */ -struct datalink_proto *make_8023_client(void) -{ - struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC); - - if (proto) { - proto->header_length = 0; - proto->request = p8023_request; - } - return proto; -} - -/* - * Destroy the 802.3 client. - */ -void destroy_8023_client(struct datalink_proto *dl) -{ - kfree(dl); -} - -EXPORT_SYMBOL(destroy_8023_client); -EXPORT_SYMBOL(make_8023_client); - -MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index c32638dddbf9..f6b9dc4e408f 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -26,7 +26,7 @@ #include <linux/types.h> #include <net/bluetooth/bluetooth.h> -#define BTNAMSIZ 18 +#define BTNAMSIZ 21 /* CMTP ioctl defines */ #define CMTPCONNADD _IOW('C', 200, int) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2560ed2f144d..8a47a3017d61 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1343,6 +1343,12 @@ int hci_inquiry(void __user *arg) goto done; } + /* Restrict maximum inquiry length to 60 seconds */ + if (ir.length > 60) { + err = -EINVAL; + goto done; + } + hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { @@ -1718,6 +1724,7 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev) int hci_dev_do_close(struct hci_dev *hdev) { bool auto_off; + int err = 0; BT_DBG("%s %p", hdev->name, hdev); @@ -1727,10 +1734,18 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_request_cancel_all(hdev); hci_req_sync_lock(hdev); + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + test_bit(HCI_UP, &hdev->flags)) { + /* Execute vendor specific shutdown routine */ + if (hdev->shutdown) + err = hdev->shutdown(hdev); + } + if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); hci_req_sync_unlock(hdev); - return 0; + return err; } hci_leds_update_powered(hdev, false); @@ -1798,14 +1813,6 @@ int hci_dev_do_close(struct hci_dev *hdev) clear_bit(HCI_INIT, &hdev->flags); } - if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && - !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - test_bit(HCI_UP, &hdev->flags)) { - /* Execute vendor specific shutdown routine */ - if (hdev->shutdown) - hdev->shutdown(hdev); - } - /* flush cmd work */ flush_work(&hdev->cmd_work); @@ -1845,7 +1852,7 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_req_sync_unlock(hdev); hci_dev_put(hdev); - return 0; + return err; } int hci_dev_close(__u16 dev) @@ -3751,11 +3758,18 @@ done: } /* Alloc HCI device */ -struct hci_dev *hci_alloc_dev(void) +struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) { struct hci_dev *hdev; + unsigned int alloc_size; - hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); + alloc_size = sizeof(*hdev); + if (sizeof_priv) { + /* Fixme: May need ALIGN-ment? */ + alloc_size += sizeof_priv; + } + + hdev = kzalloc(alloc_size, GFP_KERNEL); if (!hdev) return NULL; @@ -3869,7 +3883,7 @@ struct hci_dev *hci_alloc_dev(void) return hdev; } -EXPORT_SYMBOL(hci_alloc_dev); +EXPORT_SYMBOL(hci_alloc_dev_priv); /* Free HCI device */ void hci_free_dev(struct hci_dev *hdev) @@ -3996,14 +4010,10 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int id; - BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hci_dev_set_flag(hdev, HCI_UNREGISTER); - id = hdev->id; - write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); @@ -4038,7 +4048,14 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); + /* Actual cleanup is deferred until hci_release_dev(). */ + hci_dev_put(hdev); +} +EXPORT_SYMBOL(hci_unregister_dev); +/* Release HCI device */ +void hci_release_dev(struct hci_dev *hdev) +{ debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); @@ -4063,11 +4080,10 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_blocked_keys_clear(hdev); hci_dev_unlock(hdev); - hci_dev_put(hdev); - - ida_simple_remove(&hci_index_ida, id); + ida_simple_remove(&hci_index_ida, hdev->id); + kfree(hdev); } -EXPORT_SYMBOL(hci_unregister_dev); +EXPORT_SYMBOL(hci_release_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1c3018202564..0bca035bf2dc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -40,6 +40,8 @@ #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" +#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000) + /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb, @@ -1171,6 +1173,12 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb) bacpy(&hdev->random_addr, sent); + if (!bacmp(&hdev->rpa, sent)) { + hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); + queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, + secs_to_jiffies(hdev->rpa_timeout)); + } + hci_dev_unlock(hdev); } @@ -1201,24 +1209,30 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, { __u8 status = *((__u8 *) skb->data); struct hci_cp_le_set_adv_set_rand_addr *cp; - struct adv_info *adv_instance; + struct adv_info *adv; if (status) return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR); - if (!cp) + /* Update only in case the adv instance since handle 0x00 shall be using + * HCI_OP_LE_SET_RANDOM_ADDR since that allows both extended and + * non-extended adverting. + */ + if (!cp || !cp->handle) return; hci_dev_lock(hdev); - if (!cp->handle) { - /* Store in hdev for instance 0 (Set adv and Directed advs) */ - bacpy(&hdev->random_addr, &cp->bdaddr); - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - bacpy(&adv_instance->random_addr, &cp->bdaddr); + adv = hci_find_adv_instance(hdev, cp->handle); + if (adv) { + bacpy(&adv->random_addr, &cp->bdaddr); + if (!bacmp(&hdev->rpa, &cp->bdaddr)) { + adv->rpa_expired = false; + queue_delayed_work(hdev->workqueue, + &adv->rpa_expired_cb, + secs_to_jiffies(hdev->rpa_timeout)); + } } hci_dev_unlock(hdev); @@ -1277,7 +1291,9 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_cp_le_set_ext_adv_enable *cp; + struct hci_cp_ext_adv_set *set; __u8 status = *((__u8 *) skb->data); + struct adv_info *adv = NULL, *n; BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -1288,22 +1304,48 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, if (!cp) return; + set = (void *)cp->data; + hci_dev_lock(hdev); + if (cp->num_of_sets) + adv = hci_find_adv_instance(hdev, set->handle); + if (cp->enable) { struct hci_conn *conn; hci_dev_set_flag(hdev, HCI_LE_ADV); + if (adv) + adv->enabled = true; + conn = hci_lookup_le_connect(hdev); if (conn) queue_delayed_work(hdev->workqueue, &conn->le_conn_timeout, conn->conn_timeout); } else { + if (adv) { + adv->enabled = false; + /* If just one instance was disabled check if there are + * any other instance enabled before clearing HCI_LE_ADV + */ + list_for_each_entry_safe(adv, n, &hdev->adv_instances, + list) { + if (adv->enabled) + goto unlock; + } + } else { + /* All instances shall be considered disabled */ + list_for_each_entry_safe(adv, n, &hdev->adv_instances, + list) + adv->enabled = false; + } + hci_dev_clear_flag(hdev, HCI_LE_ADV); } +unlock: hci_dev_unlock(hdev); } @@ -2306,19 +2348,20 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { - u8 type = conn->type; - mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); + if (conn->type == LE_LINK) { + hdev->cur_adv_instance = conn->adv_instance; + hci_req_reenable_advertising(hdev); + } + /* If the disconnection failed for any reason, the upper layer * does not retry to disconnect in current implementation. * Hence, we need to do some basic cleanup here and re-enable * advertising if necessary. */ hci_conn_del(conn); - if (type == LE_LINK) - hci_req_reenable_advertising(hdev); } hci_dev_unlock(hdev); @@ -2844,7 +2887,6 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) struct hci_conn_params *params; struct hci_conn *conn; bool mgmt_connected; - u8 type; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); @@ -2899,10 +2941,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } } - type = conn->type; - hci_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); /* The suspend notifier is waiting for all devices to disconnect so * clear the bit from pending tasks and inform the wait queue. @@ -2922,8 +2961,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) * or until a connection is created or until the Advertising * is timed out due to Directed Advertising." */ - if (type == LE_LINK) + if (conn->type == LE_LINK) { + hdev->cur_adv_instance = conn->adv_instance; hci_req_reenable_advertising(hdev); + } + + hci_conn_del(conn); unlock: hci_dev_unlock(hdev); @@ -3268,11 +3311,9 @@ unlock: hci_dev_unlock(hdev); } -static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, - u16 opcode, u8 ncmd) +static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd) { - if (opcode != HCI_OP_NOP) - cancel_delayed_work(&hdev->cmd_timer); + cancel_delayed_work(&hdev->cmd_timer); if (!test_bit(HCI_RESET, &hdev->flags)) { if (ncmd) { @@ -3647,7 +3688,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, break; } - handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd); + handle_cmd_cnt_and_timer(hdev, ev->ncmd); hci_req_cmd_complete(hdev, *opcode, *status, req_complete, req_complete_skb); @@ -3748,7 +3789,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, break; } - handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd); + handle_cmd_cnt_and_timer(hdev, ev->ncmd); /* Indicate request completion if the command failed. Also, if * we're not waiting for a special event and we get a success @@ -4382,6 +4423,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, switch (ev->status) { case 0x00: + /* The synchronous connection complete event should only be + * sent once per new connection. Receiving a successful + * complete event when the connection status is already + * BT_CONNECTED means that the device is misbehaving and sent + * multiple complete event packets for the same new connection. + * + * Registering the device more than once can corrupt kernel + * memory, hence upon detecting this invalid event, we report + * an error and ignore the packet. + */ + if (conn->state == BT_CONNECTED) { + bt_dev_err(hdev, "Ignoring connect complete event for existing connection"); + goto unlock; + } + conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -5104,9 +5160,64 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, } #endif +static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr, + u8 bdaddr_type, bdaddr_t *local_rpa) +{ + if (conn->out) { + conn->dst_type = bdaddr_type; + conn->resp_addr_type = bdaddr_type; + bacpy(&conn->resp_addr, bdaddr); + + /* Check if the controller has set a Local RPA then it must be + * used instead or hdev->rpa. + */ + if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) { + conn->init_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->init_addr, local_rpa); + } else if (hci_dev_test_flag(conn->hdev, HCI_PRIVACY)) { + conn->init_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->init_addr, &conn->hdev->rpa); + } else { + hci_copy_identity_address(conn->hdev, &conn->init_addr, + &conn->init_addr_type); + } + } else { + conn->resp_addr_type = conn->hdev->adv_addr_type; + /* Check if the controller has set a Local RPA then it must be + * used instead or hdev->rpa. + */ + if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) { + conn->resp_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->resp_addr, local_rpa); + } else if (conn->hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) { + /* In case of ext adv, resp_addr will be updated in + * Adv Terminated event. + */ + if (!ext_adv_capable(conn->hdev)) + bacpy(&conn->resp_addr, + &conn->hdev->random_addr); + } else { + bacpy(&conn->resp_addr, &conn->hdev->bdaddr); + } + + conn->init_addr_type = bdaddr_type; + bacpy(&conn->init_addr, bdaddr); + + /* For incoming connections, set the default minimum + * and maximum connection interval. They will be used + * to check if the parameters are in range and if not + * trigger the connection update procedure. + */ + conn->le_conn_min_interval = conn->hdev->le_conn_min_interval; + conn->le_conn_max_interval = conn->hdev->le_conn_max_interval; + } +} + static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, - bdaddr_t *bdaddr, u8 bdaddr_type, u8 role, u16 handle, - u16 interval, u16 latency, u16 supervision_timeout) + bdaddr_t *bdaddr, u8 bdaddr_type, + bdaddr_t *local_rpa, u8 role, u16 handle, + u16 interval, u16 latency, + u16 supervision_timeout) { struct hci_conn_params *params; struct hci_conn *conn; @@ -5154,32 +5265,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, cancel_delayed_work(&conn->le_conn_timeout); } - if (!conn->out) { - /* Set the responder (our side) address type based on - * the advertising address type. - */ - conn->resp_addr_type = hdev->adv_addr_type; - if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) { - /* In case of ext adv, resp_addr will be updated in - * Adv Terminated event. - */ - if (!ext_adv_capable(hdev)) - bacpy(&conn->resp_addr, &hdev->random_addr); - } else { - bacpy(&conn->resp_addr, &hdev->bdaddr); - } - - conn->init_addr_type = bdaddr_type; - bacpy(&conn->init_addr, bdaddr); - - /* For incoming connections, set the default minimum - * and maximum connection interval. They will be used - * to check if the parameters are in range and if not - * trigger the connection update procedure. - */ - conn->le_conn_min_interval = hdev->le_conn_min_interval; - conn->le_conn_max_interval = hdev->le_conn_max_interval; - } + le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa); /* Lookup the identity address from the stored connection * address and address type. @@ -5236,6 +5322,13 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->handle = handle; conn->state = BT_CONFIG; + /* Store current advertising instance as connection advertising instance + * when sotfware rotation is in use so it can be re-enabled when + * disconnected. + */ + if (!ext_adv_capable(hdev)) + conn->adv_instance = hdev->cur_adv_instance; + conn->le_conn_interval = interval; conn->le_conn_latency = latency; conn->le_supv_timeout = supervision_timeout; @@ -5290,7 +5383,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, - ev->role, le16_to_cpu(ev->handle), + NULL, ev->role, le16_to_cpu(ev->handle), le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); @@ -5304,7 +5397,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, - ev->role, le16_to_cpu(ev->handle), + &ev->local_rpa, ev->role, le16_to_cpu(ev->handle), le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); @@ -5319,13 +5412,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data; struct hci_conn *conn; + struct adv_info *adv; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); - if (ev->status) { - struct adv_info *adv; + adv = hci_find_adv_instance(hdev, ev->handle); - adv = hci_find_adv_instance(hdev, ev->handle); + if (ev->status) { if (!adv) return; @@ -5336,11 +5429,18 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } + if (adv) + adv->enabled = false; + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle)); if (conn) { - struct adv_info *adv_instance; + /* Store handle in the connection so the correct advertising + * instance can be re-enabled when disconnected. + */ + conn->adv_instance = ev->handle; - if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM) + if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM || + bacmp(&conn->resp_addr, BDADDR_ANY)) return; if (!ev->handle) { @@ -5348,9 +5448,8 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - adv_instance = hci_find_adv_instance(hdev, ev->handle); - if (adv_instance) - bacpy(&conn->resp_addr, &adv_instance->random_addr); + if (adv) + bacpy(&conn->resp_addr, &adv->random_addr); } } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1d14adc023e9..f15626607b2d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2072,8 +2072,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, * current RPA has expired then generate a new one. */ if (use_rpa) { - int to; - /* If Controller supports LL Privacy use own address type is * 0x03 */ @@ -2084,14 +2082,10 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, *own_addr_type = ADDR_LE_DEV_RANDOM; if (adv_instance) { - if (!adv_instance->rpa_expired && - !bacmp(&adv_instance->random_addr, &hdev->rpa)) + if (adv_rpa_valid(adv_instance)) return 0; - - adv_instance->rpa_expired = false; } else { - if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && - !bacmp(&hdev->random_addr, &hdev->rpa)) + if (rpa_valid(hdev)) return 0; } @@ -2103,14 +2097,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, bacpy(rand_addr, &hdev->rpa); - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - if (adv_instance) - queue_delayed_work(hdev->workqueue, - &adv_instance->rpa_expired_cb, to); - else - queue_delayed_work(hdev->workqueue, - &hdev->rpa_expired, to); - return 0; } @@ -2153,6 +2139,30 @@ void __hci_req_clear_ext_adv_sets(struct hci_request *req) hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL); } +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +{ + struct hci_dev *hdev = req->hdev; + + /* If we're advertising or initiating an LE connection we can't + * go ahead and change the random address at this time. This is + * because the eventual initiator address used for the + * subsequently created connection will be undefined (some + * controllers use the new address and others the one we had + * when the operation started). + * + * In this kind of scenario skip the update and let the random + * address be updated at the next cycle. + */ + if (hci_dev_test_flag(hdev, HCI_LE_ADV) || + hci_lookup_le_connect(hdev)) { + bt_dev_dbg(hdev, "Deferring random address update"); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); + return; + } + + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); +} + int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; @@ -2255,6 +2265,13 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) } else { if (!bacmp(&random_addr, &hdev->random_addr)) return 0; + /* Instance 0x00 doesn't have an adv_info, instead it + * uses hdev->random_addr to track its address so + * whenever it needs to be updated this also set the + * random address since hdev->random_addr is shared with + * scan state machine. + */ + set_random_addr(req, &random_addr); } memset(&cp, 0, sizeof(cp)); @@ -2512,30 +2529,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, false); } -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) -{ - struct hci_dev *hdev = req->hdev; - - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV) || - hci_lookup_le_connect(hdev)) { - bt_dev_dbg(hdev, "Deferring random address update"); - hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); - return; - } - - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - int hci_update_random_address(struct hci_request *req, bool require_privacy, bool use_rpa, u8 *own_addr_type) { @@ -2547,8 +2540,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * the current RPA in use, then generate a new one. */ if (use_rpa) { - int to; - /* If Controller supports LL Privacy use own address type is * 0x03 */ @@ -2558,8 +2549,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, else *own_addr_type = ADDR_LE_DEV_RANDOM; - if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && - !bacmp(&hdev->random_addr, &hdev->rpa)) + if (rpa_valid(hdev)) return 0; err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); @@ -2570,9 +2560,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, set_random_addr(req, &hdev->rpa); - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); - return 0; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf3..f1128c2134f0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -59,6 +59,17 @@ struct hci_pinfo { char comm[TASK_COMM_LEN]; }; +static struct hci_dev *hci_hdev_from_sock(struct sock *sk) +{ + struct hci_dev *hdev = hci_pi(sk)->hdev; + + if (!hdev) + return ERR_PTR(-EBADFD); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return ERR_PTR(-EPIPE); + return hdev; +} + void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); @@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - /* Detach sockets from device */ + /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { - hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; - sk->sk_state = BT_OPEN; sk->sk_state_change(sk); - - hci_dev_put(hdev); } - release_sock(sk); } read_unlock(&hci_sk_list.lock); } @@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg) static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev = hci_hdev_from_sock(sk); - if (!hdev) - return -EBADFD; + if (IS_ERR(hdev)) + return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; @@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow detaching from dead device and attaching to alive device, if + * the caller wants to re-bind (instead of close) this socket in + * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. + */ + hdev = hci_pi(sk)->hdev; + if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + hci_pi(sk)->hdev = NULL; + sk->sk_state = BT_OPEN; + hci_dev_put(hdev); + } + hdev = NULL; + if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; @@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, lock_sock(sk); - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } @@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto done; } - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9874844a95a9..7827639ecf5c 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,7 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); - kfree(hdev); + + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + hci_release_dev(hdev); module_put(THIS_MODULE); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3663f880df11..cea01e275f1e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7204,7 +7204,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, if (!mgmt_rp) goto done; - if (status) + if (eir_len == 0) goto send_rsp; eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV, @@ -7725,7 +7725,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, * advertising. */ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_NOT_SUPPORTED); if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ae6f80730561..2c95bb58f901 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -70,7 +70,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) BT_DBG("dlc %p state %ld err %d", d, d->state, err); - spin_lock_bh(&sk->sk_lock.slock); + lock_sock(sk); if (err) sk->sk_err = err; @@ -91,7 +91,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) sk->sk_state_change(sk); } - spin_unlock_bh(&sk->sk_lock.slock); + release_sock(sk); if (parent && sock_flag(sk, SOCK_ZAPPED)) { /* We have to drop DLC lock here, otherwise @@ -974,7 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * if (!parent) return 0; - bh_lock_sock(parent); + lock_sock(parent); /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { @@ -1001,7 +1001,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * result = 1; done: - bh_unlock_sock(parent); + release_sock(parent); if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) parent->sk_state_change(parent); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d9a4e88dacbb..98a881586512 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -48,6 +48,8 @@ struct sco_conn { spinlock_t lock; struct sock *sk; + struct delayed_work timeout_work; + unsigned int mtu; }; @@ -74,31 +76,47 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) -static void sco_sock_timeout(struct timer_list *t) +static void sco_sock_timeout(struct work_struct *work) { - struct sock *sk = from_timer(sk, t, sk_timer); + struct sco_conn *conn = container_of(work, struct sco_conn, + timeout_work.work); + struct sock *sk; + + sco_conn_lock(conn); + sk = conn->sk; + if (sk) + sock_hold(sk); + sco_conn_unlock(conn); + + if (!sk) + return; BT_DBG("sock %p state %d", sk, sk->sk_state); - bh_lock_sock(sk); + lock_sock(sk); sk->sk_err = ETIMEDOUT; sk->sk_state_change(sk); - bh_unlock_sock(sk); - - sco_sock_kill(sk); + release_sock(sk); sock_put(sk); } static void sco_sock_set_timer(struct sock *sk, long timeout) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); + schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout); } static void sco_sock_clear_timer(struct sock *sk) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); } /* ---- SCO connections ---- */ @@ -173,12 +191,14 @@ static void sco_conn_del(struct hci_conn *hcon, int err) if (sk) { sock_hold(sk); - bh_lock_sock(sk); + lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); - bh_unlock_sock(sk); - sco_sock_kill(sk); + release_sock(sk); sock_put(sk); + + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); } hcon->sco_data = NULL; @@ -193,6 +213,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, sco_pi(sk)->conn = conn; conn->sk = sk; + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); + if (parent) bt_accept_enqueue(parent, sk, true); } @@ -212,44 +234,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, return err; } -static int sco_connect(struct sock *sk) +static int sco_connect(struct hci_dev *hdev, struct sock *sk) { struct sco_conn *conn; struct hci_conn *hcon; - struct hci_dev *hdev; int err, type; BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) - return -EHOSTUNREACH; - - hci_dev_lock(hdev); - if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && - (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { - err = -EOPNOTSUPP; - goto done; - } + (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) + return -EOPNOTSUPP; hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, sco_pi(sk)->setting); - if (IS_ERR(hcon)) { - err = PTR_ERR(hcon); - goto done; - } + if (IS_ERR(hcon)) + return PTR_ERR(hcon); conn = sco_conn_add(hcon); if (!conn) { hci_conn_drop(hcon); - err = -ENOMEM; - goto done; + return -ENOMEM; } /* Update source addr of the socket */ @@ -257,7 +267,7 @@ static int sco_connect(struct sock *sk) err = sco_chan_add(conn, sk, NULL); if (err) - goto done; + return err; if (hcon->state == BT_CONNECTED) { sco_sock_clear_timer(sk); @@ -267,9 +277,6 @@ static int sco_connect(struct sock *sk) sco_sock_set_timer(sk, sk->sk_sndtimeo); } -done: - hci_dev_unlock(hdev); - hci_dev_put(hdev); return err; } @@ -394,8 +401,7 @@ static void sco_sock_cleanup_listen(struct sock *parent) */ static void sco_sock_kill(struct sock *sk) { - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket || - sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; BT_DBG("sk %p state %d", sk, sk->sk_state); @@ -443,11 +449,10 @@ static void __sco_sock_close(struct sock *sk) /* Must be called on unlocked socket. */ static void sco_sock_close(struct sock *sk) { - sco_sock_clear_timer(sk); lock_sock(sk); + sco_sock_clear_timer(sk); __sco_sock_close(sk); release_sock(sk); - sco_sock_kill(sk); } static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg, @@ -500,8 +505,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; - timer_setup(&sk->sk_timer, sco_sock_timeout, 0); - bt_sock_link(&sco_sk_list, sk); return sk; } @@ -566,6 +569,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; + struct hci_dev *hdev; int err; BT_DBG("sk %p", sk); @@ -580,12 +584,19 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen if (sk->sk_type != SOCK_SEQPACKET) return -EINVAL; + hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); + if (!hdev) + return -EHOSTUNREACH; + hci_dev_lock(hdev); + lock_sock(sk); /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); - err = sco_connect(sk); + err = sco_connect(hdev, sk); + hci_dev_unlock(hdev); + hci_dev_put(hdev); if (err) goto done; @@ -773,6 +784,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting) cp.max_latency = cpu_to_le16(0xffff); cp.retrans_effort = 0xff; break; + default: + /* use CVSD settings as fallback */ + cp.max_latency = cpu_to_le16(0xffff); + cp.retrans_effort = 0xff; + break; } hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, @@ -1083,11 +1099,11 @@ static void sco_conn_ready(struct sco_conn *conn) BT_DBG("conn %p", conn); if (sk) { + lock_sock(sk); sco_sock_clear_timer(sk); - bh_lock_sock(sk); sk->sk_state = BT_CONNECTED; sk->sk_state_change(sk); - bh_unlock_sock(sk); + release_sock(sk); } else { sco_conn_lock(conn); @@ -1102,12 +1118,12 @@ static void sco_conn_ready(struct sco_conn *conn) return; } - bh_lock_sock(parent); + lock_sock(parent); sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { - bh_unlock_sock(parent); + release_sock(parent); sco_conn_unlock(conn); return; } @@ -1128,7 +1144,7 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); - bh_unlock_sock(parent); + release_sock(parent); sco_conn_unlock(conn); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 695449088e42..4b855af267b1 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -7,6 +7,7 @@ #include <linux/vmalloc.h> #include <linux/etherdevice.h> #include <linux/filter.h> +#include <linux/rcupdate_trace.h> #include <linux/sched/signal.h> #include <net/bpf_sk_storage.h> #include <net/sock.h> @@ -763,8 +764,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1045,7 +1045,10 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, goto out; } } + + rcu_read_lock_trace(); retval = bpf_prog_run_pin_on_cpu(prog, ctx); + rcu_read_unlock_trace(); if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) { err = -EFAULT; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index af31cebfda94..46812b659710 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1011,7 +1011,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, - u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[]) + u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], + struct netlink_ext_ack *extack) { int err = 0; @@ -1030,6 +1031,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { + if (!p && !(ndm->ndm_state & NUD_PERMANENT)) { + NL_SET_ERR_MSG_MOD(extack, + "FDB entry towards bridge must be permanent"); + return -EINVAL; + } err = br_fdb_external_learn_add(br, p, addr, vid, true); } else { spin_lock_bh(&br->hash_lock); @@ -1102,9 +1108,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, + extack); } else { - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, + extack); if (err || !vg || !vg->num_vlans) goto out; @@ -1116,7 +1124,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, - nfea_tb); + nfea_tb, extack); if (err) goto out; } @@ -1273,6 +1281,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) flags |= BIT(BR_FDB_ADDED_BY_USER); + + if (!p) + flags |= BIT(BR_FDB_LOCAL); + fdb = fdb_create(br, p, addr, vid, flags); if (!fdb) { err = -ENOMEM; @@ -1299,6 +1311,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (!p) + set_bit(BR_FDB_LOCAL, &fdb->flags); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 67c60240b713..4a02f8bb278a 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -614,6 +614,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { + br_multicast_del_port(p); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -722,6 +723,7 @@ err4: err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: + br_multicast_del_port(p); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 73a8915b0148..0281453f7766 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -16,57 +16,89 @@ #include "br_private.h" -static bool br_rports_have_mc_router(struct net_bridge_mcast *brmctx) +static bool +br_ip4_rports_get_timer(struct net_bridge_mcast_port *pmctx, + unsigned long *timer) +{ + *timer = br_timer_value(&pmctx->ip4_mc_router_timer); + return !hlist_unhashed(&pmctx->ip4_rlist); +} + +static bool +br_ip6_rports_get_timer(struct net_bridge_mcast_port *pmctx, + unsigned long *timer) { #if IS_ENABLED(CONFIG_IPV6) - return !hlist_empty(&brmctx->ip4_mc_router_list) || - !hlist_empty(&brmctx->ip6_mc_router_list); + *timer = br_timer_value(&pmctx->ip6_mc_router_timer); + return !hlist_unhashed(&pmctx->ip6_rlist); #else - return !hlist_empty(&brmctx->ip4_mc_router_list); + *timer = 0; + return false; #endif } -static bool -br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer) +static size_t __br_rports_one_size(void) { - *timer = br_timer_value(&port->multicast_ctx.ip4_mc_router_timer); - return !hlist_unhashed(&port->multicast_ctx.ip4_rlist); + return nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PORT */ + nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_TIMER */ + nla_total_size(sizeof(u8)) + /* MDBA_ROUTER_PATTR_TYPE */ + nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET_TIMER */ + nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET6_TIMER */ + nla_total_size(sizeof(u32)); /* MDBA_ROUTER_PATTR_VID */ } -static bool -br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer) +size_t br_rports_size(const struct net_bridge_mcast *brmctx) { + struct net_bridge_mcast_port *pmctx; + size_t size = nla_total_size(0); /* MDBA_ROUTER */ + + rcu_read_lock(); + hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list, + ip4_rlist) + size += __br_rports_one_size(); + #if IS_ENABLED(CONFIG_IPV6) - *timer = br_timer_value(&port->multicast_ctx.ip6_mc_router_timer); - return !hlist_unhashed(&port->multicast_ctx.ip6_rlist); -#else - *timer = 0; - return false; + hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list, + ip6_rlist) + size += __br_rports_one_size(); #endif + rcu_read_unlock(); + + return size; } -static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev) +int br_rports_fill_info(struct sk_buff *skb, + const struct net_bridge_mcast *brmctx) { - struct net_bridge *br = netdev_priv(dev); + u16 vid = brmctx->vlan ? brmctx->vlan->vid : 0; bool have_ip4_mc_rtr, have_ip6_mc_rtr; unsigned long ip4_timer, ip6_timer; struct nlattr *nest, *port_nest; struct net_bridge_port *p; - if (!br->multicast_ctx.multicast_router) - return 0; - - if (!br_rports_have_mc_router(&br->multicast_ctx)) + if (!brmctx->multicast_router || !br_rports_have_mc_router(brmctx)) return 0; nest = nla_nest_start_noflag(skb, MDBA_ROUTER); if (nest == NULL) return -EMSGSIZE; - list_for_each_entry_rcu(p, &br->port_list, list) { - have_ip4_mc_rtr = br_ip4_rports_get_timer(p, &ip4_timer); - have_ip6_mc_rtr = br_ip6_rports_get_timer(p, &ip6_timer); + list_for_each_entry_rcu(p, &brmctx->br->port_list, list) { + struct net_bridge_mcast_port *pmctx; + + if (vid) { + struct net_bridge_vlan *v; + + v = br_vlan_find(nbp_vlan_group(p), vid); + if (!v) + continue; + pmctx = &v->port_mcast_ctx; + } else { + pmctx = &p->multicast_ctx; + } + + have_ip4_mc_rtr = br_ip4_rports_get_timer(pmctx, &ip4_timer); + have_ip6_mc_rtr = br_ip6_rports_get_timer(pmctx, &ip6_timer); if (!have_ip4_mc_rtr && !have_ip6_mc_rtr) continue; @@ -85,7 +117,8 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, ip4_timer)) || (have_ip6_mc_rtr && nla_put_u32(skb, MDBA_ROUTER_PATTR_INET6_TIMER, - ip6_timer))) { + ip6_timer)) || + (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid))) { nla_nest_cancel(skb, port_nest); goto fail; } @@ -390,6 +423,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) for_each_netdev_rcu(net, dev) { if (dev->priv_flags & IFF_EBRIDGE) { + struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; if (idx < s_idx) @@ -406,7 +440,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) bpm->ifindex = dev->ifindex; if (br_mdb_fill_info(skb, cb, dev) < 0) goto out; - if (br_rports_fill_info(skb, cb, dev) < 0) + if (br_rports_fill_info(skb, &br->multicast_ctx) < 0) goto out; cb->args[1] = 0; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 470f1ec3b579..16e686f5b9e9 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -221,7 +221,7 @@ br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg) * can safely be used on return */ rcu_read_lock(); - vlan = br_vlan_find(nbp_vlan_group(pg->key.port), pg->key.addr.vid); + vlan = br_vlan_find(nbp_vlan_group_rcu(pg->key.port), pg->key.addr.vid); if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) pmctx = &vlan->port_mcast_ctx; else @@ -1628,7 +1628,8 @@ static void __br_multicast_send_query(struct net_bridge_mcast *brmctx, struct sk_buff *skb; u8 igmp_type; - if (!br_multicast_ctx_should_use(brmctx, pmctx)) + if (!br_multicast_ctx_should_use(brmctx, pmctx) || + !br_multicast_ctx_matches_vlan_snooping(brmctx)) return; again_under_lmqt: @@ -1658,27 +1659,56 @@ again_under_lmqt: } } +static void br_multicast_read_querier(const struct bridge_mcast_querier *querier, + struct bridge_mcast_querier *dest) +{ + unsigned int seq; + + memset(dest, 0, sizeof(*dest)); + do { + seq = read_seqcount_begin(&querier->seq); + dest->port_ifidx = querier->port_ifidx; + memcpy(&dest->addr, &querier->addr, sizeof(struct br_ip)); + } while (read_seqcount_retry(&querier->seq, seq)); +} + +static void br_multicast_update_querier(struct net_bridge_mcast *brmctx, + struct bridge_mcast_querier *querier, + int ifindex, + struct br_ip *saddr) +{ + lockdep_assert_held_once(&brmctx->br->multicast_lock); + + write_seqcount_begin(&querier->seq); + querier->port_ifidx = ifindex; + memcpy(&querier->addr, saddr, sizeof(*saddr)); + write_seqcount_end(&querier->seq); +} + static void br_multicast_send_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *own_query) { struct bridge_mcast_other_query *other_query = NULL; + struct bridge_mcast_querier *querier; struct br_ip br_group; unsigned long time; if (!br_multicast_ctx_should_use(brmctx, pmctx) || !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) || - !br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) + !brmctx->multicast_querier) return; memset(&br_group.dst, 0, sizeof(br_group.dst)); if (pmctx ? (own_query == &pmctx->ip4_own_query) : (own_query == &brmctx->ip4_own_query)) { + querier = &brmctx->ip4_querier; other_query = &brmctx->ip4_other_query; br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) } else { + querier = &brmctx->ip6_querier; other_query = &brmctx->ip6_other_query; br_group.proto = htons(ETH_P_IPV6); #endif @@ -1687,6 +1717,13 @@ static void br_multicast_send_query(struct net_bridge_mcast *brmctx, if (!other_query || timer_pending(&other_query->timer)) return; + /* we're about to select ourselves as querier */ + if (!pmctx && querier->port_ifidx) { + struct br_ip zeroip = {}; + + br_multicast_update_querier(brmctx, querier, 0, &zeroip); + } + __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false, 0, NULL); @@ -1747,14 +1784,16 @@ static void br_multicast_port_group_rexmit(struct timer_list *t) spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || - !br_opt_get(br, BROPT_MULTICAST_ENABLED) || - !br_opt_get(br, BROPT_MULTICAST_QUERIER)) + !br_opt_get(br, BROPT_MULTICAST_ENABLED)) goto out; pmctx = br_multicast_pg_to_port_ctx(pg); if (!pmctx) goto out; brmctx = br_multicast_port_ctx_get_global(pmctx); + if (!brmctx->multicast_querier) + goto out; + if (pg->key.addr.proto == htons(ETH_P_IP)) other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) @@ -1974,8 +2013,7 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx, if (ent->flags & BR_SGRP_F_SEND) { ent->flags &= ~BR_SGRP_F_SEND; if (ent->timer.expires > lmqt) { - if (br_opt_get(brmctx->br, - BROPT_MULTICAST_QUERIER) && + if (brmctx->multicast_querier && other_query && !timer_pending(&other_query->timer)) ent->src_query_rexmit_cnt = lmqc; @@ -1984,7 +2022,7 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx, } } - if (!br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) || + if (!brmctx->multicast_querier || !other_query || timer_pending(&other_query->timer)) return; @@ -2015,7 +2053,7 @@ static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx, other_query = &brmctx->ip6_other_query; #endif - if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) && + if (brmctx->multicast_querier && other_query && !timer_pending(&other_query->timer)) { lmi = now + brmctx->multicast_last_member_interval; pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1; @@ -2826,58 +2864,157 @@ unlock_continue: } #endif -static bool br_ip4_multicast_select_querier(struct net_bridge_mcast *brmctx, - struct net_bridge_mcast_port *pmctx, - __be32 saddr) +static bool br_multicast_select_querier(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct br_ip *saddr) { - struct net_bridge_port *port = pmctx ? pmctx->port : NULL; + int port_ifidx = pmctx ? pmctx->port->dev->ifindex : 0; + struct timer_list *own_timer, *other_timer; + struct bridge_mcast_querier *querier; - if (!timer_pending(&brmctx->ip4_own_query.timer) && - !timer_pending(&brmctx->ip4_other_query.timer)) - goto update; - - if (!brmctx->ip4_querier.addr.src.ip4) - goto update; + switch (saddr->proto) { + case htons(ETH_P_IP): + querier = &brmctx->ip4_querier; + own_timer = &brmctx->ip4_own_query.timer; + other_timer = &brmctx->ip4_other_query.timer; + if (!querier->addr.src.ip4 || + ntohl(saddr->src.ip4) <= ntohl(querier->addr.src.ip4)) + goto update; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + querier = &brmctx->ip6_querier; + own_timer = &brmctx->ip6_own_query.timer; + other_timer = &brmctx->ip6_other_query.timer; + if (ipv6_addr_cmp(&saddr->src.ip6, &querier->addr.src.ip6) <= 0) + goto update; + break; +#endif + default: + return false; + } - if (ntohl(saddr) <= ntohl(brmctx->ip4_querier.addr.src.ip4)) + if (!timer_pending(own_timer) && !timer_pending(other_timer)) goto update; return false; update: - brmctx->ip4_querier.addr.src.ip4 = saddr; - - /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(brmctx->ip4_querier.port, port); + br_multicast_update_querier(brmctx, querier, port_ifidx, saddr); return true; } +static struct net_bridge_port * +__br_multicast_get_querier_port(struct net_bridge *br, + const struct bridge_mcast_querier *querier) +{ + int port_ifidx = READ_ONCE(querier->port_ifidx); + struct net_bridge_port *p; + struct net_device *dev; + + if (port_ifidx == 0) + return NULL; + + dev = dev_get_by_index_rcu(dev_net(br->dev), port_ifidx); + if (!dev) + return NULL; + p = br_port_get_rtnl_rcu(dev); + if (!p || p->br != br) + return NULL; + + return p; +} + +size_t br_multicast_querier_state_size(void) +{ + return nla_total_size(0) + /* nest attribute */ + nla_total_size(sizeof(__be32)) + /* BRIDGE_QUERIER_IP_ADDRESS */ + nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IP_PORT */ + nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IP_OTHER_TIMER */ #if IS_ENABLED(CONFIG_IPV6) -static bool br_ip6_multicast_select_querier(struct net_bridge_mcast *brmctx, - struct net_bridge_mcast_port *pmctx, - struct in6_addr *saddr) + nla_total_size(sizeof(struct in6_addr)) + /* BRIDGE_QUERIER_IPV6_ADDRESS */ + nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IPV6_PORT */ + nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IPV6_OTHER_TIMER */ +#endif + 0; +} + +/* protected by rtnl or rcu */ +int br_multicast_dump_querier_state(struct sk_buff *skb, + const struct net_bridge_mcast *brmctx, + int nest_attr) { - struct net_bridge_port *port = pmctx ? pmctx->port : NULL; + struct bridge_mcast_querier querier = {}; + struct net_bridge_port *p; + struct nlattr *nest; - if (!timer_pending(&brmctx->ip6_own_query.timer) && - !timer_pending(&brmctx->ip6_other_query.timer)) - goto update; + if (!br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) || + br_multicast_ctx_vlan_global_disabled(brmctx)) + return 0; - if (ipv6_addr_cmp(saddr, &brmctx->ip6_querier.addr.src.ip6) <= 0) - goto update; + nest = nla_nest_start(skb, nest_attr); + if (!nest) + return -EMSGSIZE; - return false; + rcu_read_lock(); + if (!brmctx->multicast_querier && + !timer_pending(&brmctx->ip4_other_query.timer)) + goto out_v6; -update: - brmctx->ip6_querier.addr.src.ip6 = *saddr; + br_multicast_read_querier(&brmctx->ip4_querier, &querier); + if (nla_put_in_addr(skb, BRIDGE_QUERIER_IP_ADDRESS, + querier.addr.src.ip4)) { + rcu_read_unlock(); + goto out_err; + } - /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(brmctx->ip6_querier.port, port); + p = __br_multicast_get_querier_port(brmctx->br, &querier); + if (timer_pending(&brmctx->ip4_other_query.timer) && + (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IP_OTHER_TIMER, + br_timer_value(&brmctx->ip4_other_query.timer), + BRIDGE_QUERIER_PAD) || + (p && nla_put_u32(skb, BRIDGE_QUERIER_IP_PORT, p->dev->ifindex)))) { + rcu_read_unlock(); + goto out_err; + } - return true; -} +out_v6: +#if IS_ENABLED(CONFIG_IPV6) + if (!brmctx->multicast_querier && + !timer_pending(&brmctx->ip6_other_query.timer)) + goto out; + + br_multicast_read_querier(&brmctx->ip6_querier, &querier); + if (nla_put_in6_addr(skb, BRIDGE_QUERIER_IPV6_ADDRESS, + &querier.addr.src.ip6)) { + rcu_read_unlock(); + goto out_err; + } + + p = __br_multicast_get_querier_port(brmctx->br, &querier); + if (timer_pending(&brmctx->ip6_other_query.timer) && + (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IPV6_OTHER_TIMER, + br_timer_value(&brmctx->ip6_other_query.timer), + BRIDGE_QUERIER_PAD) || + (p && nla_put_u32(skb, BRIDGE_QUERIER_IPV6_PORT, + p->dev->ifindex)))) { + rcu_read_unlock(); + goto out_err; + } +out: #endif + rcu_read_unlock(); + nla_nest_end(skb, nest); + if (!nla_len(nest)) + nla_nest_cancel(skb, nest); + + return 0; + +out_err: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} static void br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, @@ -3080,7 +3217,7 @@ br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip4_multicast_select_querier(brmctx, pmctx, saddr->src.ip4)) + if (!br_multicast_select_querier(brmctx, pmctx, saddr)) return; br_multicast_update_query_timer(brmctx, query, max_delay); @@ -3095,7 +3232,7 @@ br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip6_multicast_select_querier(brmctx, pmctx, &saddr->src.ip6)) + if (!br_multicast_select_querier(brmctx, pmctx, saddr)) return; br_multicast_update_query_timer(brmctx, query, max_delay); @@ -3115,7 +3252,7 @@ static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx, struct igmpv3_query *ih3; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - struct br_ip saddr; + struct br_ip saddr = {}; unsigned long max_delay; unsigned long now = jiffies; __be32 group; @@ -3195,7 +3332,7 @@ static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx, struct mld2_query *mld2q; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - struct br_ip saddr; + struct br_ip saddr = {}; unsigned long max_delay; unsigned long now = jiffies; unsigned int offset = skb_transport_offset(skb); @@ -3316,7 +3453,7 @@ br_multicast_leave_group(struct net_bridge_mcast *brmctx, if (timer_pending(&other_query->timer)) goto out; - if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) { + if (brmctx->multicast_querier) { __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr, false, 0, NULL); @@ -3673,7 +3810,6 @@ static void br_multicast_query_expired(struct net_bridge_mcast *brmctx, if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; - RCU_INIT_POINTER(querier->port, NULL); br_multicast_send_query(brmctx, NULL, query); out: spin_unlock(&brmctx->br->multicast_lock); @@ -3730,12 +3866,14 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_membership_interval = 260 * HZ; brmctx->ip4_other_query.delay_time = 0; - brmctx->ip4_querier.port = NULL; + brmctx->ip4_querier.port_ifidx = 0; + seqcount_init(&brmctx->ip4_querier.seq); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; brmctx->ip6_other_query.delay_time = 0; - brmctx->ip6_querier.port = NULL; + brmctx->ip6_querier.port_ifidx = 0; + seqcount_init(&brmctx->ip6_querier.seq); #endif timer_setup(&brmctx->ip4_mc_router_timer, @@ -3874,9 +4012,9 @@ void br_multicast_open(struct net_bridge *br) __br_multicast_open(&vlan->br_mcast_ctx); } } + } else { + __br_multicast_open(&br->multicast_ctx); } - - __br_multicast_open(&br->multicast_ctx); } static void __br_multicast_stop(struct net_bridge_mcast *brmctx) @@ -3936,7 +4074,7 @@ void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) } } -void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) +static void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) { struct net_bridge_port *p; @@ -3951,6 +4089,9 @@ void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) continue; br_multicast_toggle_one_vlan(vport, on); } + + if (br_vlan_is_brentry(vlan)) + br_multicast_toggle_one_vlan(vlan, on); } int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, @@ -4027,9 +4168,9 @@ void br_multicast_stop(struct net_bridge *br) __br_multicast_stop(&vlan->br_mcast_ctx); } } + } else { + __br_multicast_stop(&br->multicast_ctx); } - - __br_multicast_stop(&br->multicast_ctx); } void br_multicast_dev_del(struct net_bridge *br) @@ -4051,17 +4192,16 @@ void br_multicast_dev_del(struct net_bridge *br) rcu_barrier(); } -int br_multicast_set_router(struct net_bridge *br, unsigned long val) +int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val) { - struct net_bridge_mcast *brmctx = &br->multicast_ctx; int err = -EINVAL; - spin_lock_bh(&br->multicast_lock); + spin_lock_bh(&brmctx->br->multicast_lock); switch (val) { case MDB_RTR_TYPE_DISABLED: case MDB_RTR_TYPE_PERM: - br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM); + br_mc_router_state_change(brmctx->br, val == MDB_RTR_TYPE_PERM); del_timer(&brmctx->ip4_mc_router_timer); #if IS_ENABLED(CONFIG_IPV6) del_timer(&brmctx->ip6_mc_router_timer); @@ -4071,13 +4211,13 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val) break; case MDB_RTR_TYPE_TEMP_QUERY: if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) - br_mc_router_state_change(br, false); + br_mc_router_state_change(brmctx->br, false); brmctx->multicast_router = val; err = 0; break; } - spin_unlock_bh(&br->multicast_lock); + spin_unlock_bh(&brmctx->br->multicast_lock); return err; } @@ -4174,6 +4314,9 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, { struct net_bridge_port *port; + if (!br_multicast_ctx_matches_vlan_snooping(brmctx)) + return; + __br_multicast_open_query(brmctx->br, query); rcu_read_lock(); @@ -4189,7 +4332,8 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, if (br_multicast_ctx_is_vlan(brmctx)) { struct net_bridge_vlan *vlan; - vlan = br_vlan_find(nbp_vlan_group(port), brmctx->vlan->vid); + vlan = br_vlan_find(nbp_vlan_group_rcu(port), + brmctx->vlan->vid); if (!vlan || br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx)) continue; @@ -4292,18 +4436,17 @@ bool br_multicast_router(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_multicast_router); -int br_multicast_set_querier(struct net_bridge *br, unsigned long val) +int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) { - struct net_bridge_mcast *brmctx = &br->multicast_ctx; unsigned long max_delay; val = !!val; - spin_lock_bh(&br->multicast_lock); - if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val) + spin_lock_bh(&brmctx->br->multicast_lock); + if (brmctx->multicast_querier == val) goto unlock; - br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val); + WRITE_ONCE(brmctx->multicast_querier, val); if (!val) goto unlock; @@ -4322,12 +4465,13 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) #endif unlock: - spin_unlock_bh(&br->multicast_lock); + spin_unlock_bh(&brmctx->br->multicast_lock); return 0; } -int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val) +int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx, + unsigned long val) { /* Currently we support only version 2 and 3 */ switch (val) { @@ -4338,15 +4482,16 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val) return -EINVAL; } - spin_lock_bh(&br->multicast_lock); - br->multicast_ctx.multicast_igmp_version = val; - spin_unlock_bh(&br->multicast_lock); + spin_lock_bh(&brmctx->br->multicast_lock); + brmctx->multicast_igmp_version = val; + spin_unlock_bh(&brmctx->br->multicast_lock); return 0; } #if IS_ENABLED(CONFIG_IPV6) -int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val) +int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, + unsigned long val) { /* Currently we support version 1 and 2 */ switch (val) { @@ -4357,9 +4502,9 @@ int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val) return -EINVAL; } - spin_lock_bh(&br->multicast_lock); - br->multicast_ctx.multicast_mld_version = val; - spin_unlock_bh(&br->multicast_lock); + spin_lock_bh(&brmctx->br->multicast_lock); + brmctx->multicast_mld_version = val; + spin_unlock_bh(&brmctx->br->multicast_lock); return 0; } @@ -4474,6 +4619,7 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) struct net_bridge *br; struct net_bridge_port *port; bool ret = false; + int port_ifidx; rcu_read_lock(); if (!netif_is_bridge_port(dev)) @@ -4488,14 +4634,16 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) switch (proto) { case ETH_P_IP: + port_ifidx = brmctx->ip4_querier.port_ifidx; if (!timer_pending(&brmctx->ip4_other_query.timer) || - rcu_dereference(brmctx->ip4_querier.port) == port) + port_ifidx == port->dev->ifindex) goto unlock; break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: + port_ifidx = brmctx->ip6_querier.port_ifidx; if (!timer_pending(&brmctx->ip6_other_query.timer) || - rcu_dereference(brmctx->ip6_querier.port) == port) + port_ifidx == port->dev->ifindex) goto unlock; break; #endif diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 616a1b6dec3c..2f184ad8ae29 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1286,7 +1286,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_ROUTER]) { u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]); - err = br_multicast_set_router(br, multicast_router); + err = br_multicast_set_router(&br->multicast_ctx, + multicast_router); if (err) return err; } @@ -1309,7 +1310,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_QUERIER]) { u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]); - err = br_multicast_set_querier(br, mcast_querier); + err = br_multicast_set_querier(&br->multicast_ctx, + mcast_querier); if (err) return err; } @@ -1380,7 +1382,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], __u8 igmp_version; igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]); - err = br_multicast_set_igmp_version(br, igmp_version); + err = br_multicast_set_igmp_version(&br->multicast_ctx, + igmp_version); if (err) return err; } @@ -1390,7 +1393,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], __u8 mld_version; mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]); - err = br_multicast_set_mld_version(br, mld_version); + err = br_multicast_set_mld_version(&br->multicast_ctx, + mld_version); if (err) return err; } @@ -1497,6 +1501,7 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */ + br_multicast_querier_state_size() + /* IFLA_BR_MCAST_QUERIER_STATE */ #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ @@ -1573,7 +1578,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR, br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)) || nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, - br_opt_get(br, BROPT_MULTICAST_QUERIER)) || + br->multicast_ctx.multicast_querier) || nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED, br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) || nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) || @@ -1583,7 +1588,9 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT, br->multicast_ctx.multicast_startup_query_count) || nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION, - br->multicast_ctx.multicast_igmp_version)) + br->multicast_ctx.multicast_igmp_version) || + br_multicast_dump_querier_state(skb, &br->multicast_ctx, + IFLA_BR_MCAST_QUERIER_STATE)) return -EMSGSIZE; #if IS_ENABLED(CONFIG_IPV6) if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 10d43bf4bb80..21b292eb2b3e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -81,7 +81,8 @@ struct bridge_mcast_other_query { /* selected querier */ struct bridge_mcast_querier { struct br_ip addr; - struct net_bridge_port __rcu *port; + int port_ifidx; + seqcount_t seq; }; /* IGMP/MLD statistics */ @@ -118,6 +119,7 @@ struct net_bridge_mcast { u32 multicast_last_member_count; u32 multicast_startup_query_count; + u8 multicast_querier; u8 multicast_igmp_version; u8 multicast_router; #if IS_ENABLED(CONFIG_IPV6) @@ -431,7 +433,6 @@ enum net_bridge_opts { BROPT_NF_CALL_ARPTABLES, BROPT_GROUP_ADDR_SET, BROPT_MULTICAST_ENABLED, - BROPT_MULTICAST_QUERIER, BROPT_MULTICAST_QUERY_USE_IFADDR, BROPT_MULTICAST_STATS_ENABLED, BROPT_HAS_IPV6_ADDR, @@ -877,15 +878,17 @@ void br_multicast_dev_del(struct net_bridge *br); void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig); -int br_multicast_set_router(struct net_bridge *br, unsigned long val); +int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val); int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack); -int br_multicast_set_querier(struct net_bridge *br, unsigned long val); +int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val); int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); -int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val); +int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx, + unsigned long val); #if IS_ENABLED(CONFIG_IPV6) -int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val); +int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, + unsigned long val); #endif struct net_bridge_mdb_entry * br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst); @@ -935,7 +938,6 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_mcast_port *pmctx); void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); -void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on); int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); @@ -943,6 +945,13 @@ bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, const void *ctx, bool adding, struct notifier_block *nb, struct netlink_ext_ack *extack); +int br_rports_fill_info(struct sk_buff *skb, + const struct net_bridge_mcast *brmctx); +int br_multicast_dump_querier_state(struct sk_buff *skb, + const struct net_bridge_mcast *brmctx, + int nest_attr); +size_t br_multicast_querier_state_size(void); +size_t br_rports_size(const struct net_bridge_mcast *brmctx); static inline bool br_group_is_l2(const struct br_ip *group) { @@ -1026,7 +1035,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, { bool own_querier_enabled; - if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) { + if (brmctx->multicast_querier) { if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR)) own_querier_enabled = false; else @@ -1165,6 +1174,61 @@ br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx) (br_multicast_port_ctx_is_vlan(pmctx) && pmctx->vlan->state == BR_STATE_BLOCKING); } + +static inline bool +br_rports_have_mc_router(const struct net_bridge_mcast *brmctx) +{ +#if IS_ENABLED(CONFIG_IPV6) + return !hlist_empty(&brmctx->ip4_mc_router_list) || + !hlist_empty(&brmctx->ip6_mc_router_list); +#else + return !hlist_empty(&brmctx->ip4_mc_router_list); +#endif +} + +static inline bool +br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1, + const struct net_bridge_mcast *brmctx2) +{ + return brmctx1->multicast_igmp_version == + brmctx2->multicast_igmp_version && + brmctx1->multicast_last_member_count == + brmctx2->multicast_last_member_count && + brmctx1->multicast_startup_query_count == + brmctx2->multicast_startup_query_count && + brmctx1->multicast_last_member_interval == + brmctx2->multicast_last_member_interval && + brmctx1->multicast_membership_interval == + brmctx2->multicast_membership_interval && + brmctx1->multicast_querier_interval == + brmctx2->multicast_querier_interval && + brmctx1->multicast_query_interval == + brmctx2->multicast_query_interval && + brmctx1->multicast_query_response_interval == + brmctx2->multicast_query_response_interval && + brmctx1->multicast_startup_query_interval == + brmctx2->multicast_startup_query_interval && + brmctx1->multicast_querier == brmctx2->multicast_querier && + brmctx1->multicast_router == brmctx2->multicast_router && + !br_rports_have_mc_router(brmctx1) && + !br_rports_have_mc_router(brmctx2) && +#if IS_ENABLED(CONFIG_IPV6) + brmctx1->multicast_mld_version == + brmctx2->multicast_mld_version && +#endif + true; +} + +static inline bool +br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx) +{ + bool vlan_snooping_enabled; + + vlan_snooping_enabled = !!br_opt_get(brmctx->br, + BROPT_MCAST_VLAN_SNOOPING_ENABLED); + + return !!(vlan_snooping_enabled == br_multicast_ctx_is_vlan(brmctx)); +} #else static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx, struct net_bridge_mcast_port **pmctx, @@ -1305,11 +1369,6 @@ static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, { } -static inline void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, - bool on) -{ -} - static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack) @@ -1330,6 +1389,13 @@ static inline int br_mdb_replay(struct net_device *br_dev, { return -EOPNOTSUPP; } + +static inline bool +br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1, + const struct net_bridge_mcast *brmctx2) +{ + return true; +} #endif /* br_vlan.c */ diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 953d544663d5..d9a89ddd0331 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -390,7 +390,7 @@ static ssize_t multicast_router_show(struct device *d, static int set_multicast_router(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - return br_multicast_set_router(br, val); + return br_multicast_set_router(&br->multicast_ctx, val); } static ssize_t multicast_router_store(struct device *d, @@ -447,13 +447,13 @@ static ssize_t multicast_querier_show(struct device *d, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER)); + return sprintf(buf, "%d\n", br->multicast_ctx.multicast_querier); } static int set_multicast_querier(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - return br_multicast_set_querier(br, val); + return br_multicast_set_querier(&br->multicast_ctx, val); } static ssize_t multicast_querier_store(struct device *d, @@ -520,7 +520,7 @@ static ssize_t multicast_igmp_version_show(struct device *d, static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - return br_multicast_set_igmp_version(br, val); + return br_multicast_set_igmp_version(&br->multicast_ctx, val); } static ssize_t multicast_igmp_version_store(struct device *d, @@ -757,7 +757,7 @@ static ssize_t multicast_mld_version_show(struct device *d, static int set_multicast_mld_version(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - return br_multicast_set_mld_version(br, val); + return br_multicast_set_mld_version(&br->multicast_ctx, val); } static ssize_t multicast_mld_version_store(struct device *d, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 8cfd035bbaf9..e25e288e7a85 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -694,6 +694,7 @@ static int br_vlan_add_existing(struct net_bridge *br, vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans++; *changed = true; + br_multicast_toggle_one_vlan(vlan, true); } if (__vlan_add_flags(vlan, flags)) @@ -2019,7 +2020,7 @@ static int br_vlan_dump_dev(const struct net_device *dev, if (dump_global) { if (br_vlan_global_opts_can_enter_range(v, range_end)) - continue; + goto update_end; if (!br_vlan_global_opts_fill(skb, range_start->vid, range_end->vid, range_start)) { @@ -2045,6 +2046,7 @@ static int br_vlan_dump_dev(const struct net_device *dev, range_start = v; } +update_end: range_end = v; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index 4ef975b20185..a3b8a086284b 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -264,12 +264,16 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, { return v_curr->vid - r_end->vid == 1 && ((v_curr->priv_flags ^ r_end->priv_flags) & - BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0; + BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0 && + br_multicast_ctx_options_equal(&v_curr->br_mcast_ctx, + &r_end->br_mcast_ctx); } bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts) { + struct nlattr *nest2 __maybe_unused; + u64 clockval __maybe_unused; struct nlattr *nest; nest = nla_nest_start(skb, BRIDGE_VLANDB_GLOBAL_OPTIONS); @@ -285,8 +289,68 @@ bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, - !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))) + !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) || + nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, + v_opts->br_mcast_ctx.multicast_igmp_version) || + nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, + v_opts->br_mcast_ctx.multicast_last_member_count) || + nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, + v_opts->br_mcast_ctx.multicast_startup_query_count) || + nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, + v_opts->br_mcast_ctx.multicast_querier) || + nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, + v_opts->br_mcast_ctx.multicast_router) || + br_multicast_dump_querier_state(skb, &v_opts->br_mcast_ctx, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE)) goto out_err; + + clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_last_member_interval); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, + clockval, BRIDGE_VLANDB_GOPTS_PAD)) + goto out_err; + clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_membership_interval); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, + clockval, BRIDGE_VLANDB_GOPTS_PAD)) + goto out_err; + clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_querier_interval); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, + clockval, BRIDGE_VLANDB_GOPTS_PAD)) + goto out_err; + clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_interval); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, + clockval, BRIDGE_VLANDB_GOPTS_PAD)) + goto out_err; + clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_response_interval); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, + clockval, BRIDGE_VLANDB_GOPTS_PAD)) + goto out_err; + clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_startup_query_interval); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, + clockval, BRIDGE_VLANDB_GOPTS_PAD)) + goto out_err; + + if (br_rports_have_mc_router(&v_opts->br_mcast_ctx)) { + nest2 = nla_nest_start(skb, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS); + if (!nest2) + goto out_err; + + rcu_read_lock(); + if (br_rports_fill_info(skb, &v_opts->br_mcast_ctx)) { + rcu_read_unlock(); + nla_nest_cancel(skb, nest2); + goto out_err; + } + rcu_read_unlock(); + + nla_nest_end(skb, nest2); + } + +#if IS_ENABLED(CONFIG_IPV6) + if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, + v_opts->br_mcast_ctx.multicast_mld_version)) + goto out_err; +#endif #endif nla_nest_end(skb, nest); @@ -298,13 +362,28 @@ out_err: return false; } -static size_t rtnl_vlan_global_opts_nlmsg_size(void) +static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v) { return NLMSG_ALIGN(sizeof(struct br_vlan_msg)) + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_ID */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING */ + + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION */ + + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION */ + + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT */ + + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT */ + + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL */ + + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL */ + + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL */ + + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL */ + + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL */ + + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL */ + + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER */ + + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER */ + + br_multicast_querier_state_size() /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE */ + + nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */ + + br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */ #endif + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */ } @@ -321,7 +400,12 @@ static void br_vlan_global_opts_notify(const struct net_bridge *br, /* right now notifications are done only with rtnl held */ ASSERT_RTNL(); - skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(), GFP_KERNEL); + /* need to find the vlan due to flags/options */ + v = br_vlan_find(br_vlan_group(br), vid); + if (!v) + return; + + skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(v), GFP_KERNEL); if (!skb) goto out_err; @@ -334,11 +418,6 @@ static void br_vlan_global_opts_notify(const struct net_bridge *br, bvm->family = AF_BRIDGE; bvm->ifindex = br->dev->ifindex; - /* need to find the vlan due to flags/options */ - v = br_vlan_find(br_vlan_group(br), vid); - if (!v) - goto out_kfree; - if (!br_vlan_global_opts_fill(skb, vid, vid_range, v)) goto out_err; @@ -348,7 +427,6 @@ static void br_vlan_global_opts_notify(const struct net_bridge *br, out_err: rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err); -out_kfree: kfree_skb(skb); } @@ -359,6 +437,8 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, bool *changed, struct netlink_ext_ack *extack) { + int err __maybe_unused; + *changed = false; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]) { @@ -368,6 +448,100 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, if (br_multicast_toggle_global_vlan(v, !!mc_snooping)) *changed = true; } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]) { + u8 ver; + + ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]); + err = br_multicast_set_igmp_version(&v->br_mcast_ctx, ver); + if (err) + return err; + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]) { + u32 cnt; + + cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]); + v->br_mcast_ctx.multicast_last_member_count = cnt; + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]) { + u32 cnt; + + cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]); + v->br_mcast_ctx.multicast_startup_query_count = cnt; + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]) { + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]); + v->br_mcast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]) { + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]); + v->br_mcast_ctx.multicast_membership_interval = clock_t_to_jiffies(val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]) { + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]); + v->br_mcast_ctx.multicast_querier_interval = clock_t_to_jiffies(val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]) { + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]); + v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) { + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]); + v->br_mcast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]) { + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]); + v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) { + u8 val; + + val = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]); + err = br_multicast_set_querier(&v->br_mcast_ctx, val); + if (err) + return err; + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_ROUTER]) { + u8 val; + + val = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_ROUTER]); + err = br_multicast_set_router(&v->br_mcast_ctx, val); + if (err) + return err; + *changed = true; + } +#if IS_ENABLED(CONFIG_IPV6) + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]) { + u8 ver; + + ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]); + err = br_multicast_set_mld_version(&v->br_mcast_ctx, ver); + if (err) + return err; + *changed = true; + } +#endif #endif return 0; @@ -377,6 +551,18 @@ static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = { [BRIDGE_VLANDB_GOPTS_ID] = { .type = NLA_U16 }, [BRIDGE_VLANDB_GOPTS_RANGE] = { .type = NLA_U16 }, [BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL] = { .type = NLA_U64 }, + [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_GOPTS_MCAST_ROUTER] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 }, + [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 }, + [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL] = { .type = NLA_U64 }, + [BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL] = { .type = NLA_U64 }, + [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL] = { .type = NLA_U64 }, + [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 }, + [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 }, }; int br_vlan_rtm_process_global_options(struct net_device *dev, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 020b1487ee0c..a7af4eaff17d 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -98,7 +98,7 @@ static const struct nf_hook_ops ebt_ops_broute = { .priority = NF_BR_PRI_FIRST, }; -static int __net_init broute_net_init(struct net *net) +static int broute_table_init(struct net *net) { return ebt_register_table(net, &broute_table, &ebt_ops_broute); } @@ -114,19 +114,30 @@ static void __net_exit broute_net_exit(struct net *net) } static struct pernet_operations broute_net_ops = { - .init = broute_net_init, .exit = broute_net_exit, .pre_exit = broute_net_pre_exit, }; static int __init ebtable_broute_init(void) { - return register_pernet_subsys(&broute_net_ops); + int ret = ebt_register_template(&broute_table, broute_table_init); + + if (ret) + return ret; + + ret = register_pernet_subsys(&broute_net_ops); + if (ret) { + ebt_unregister_template(&broute_table); + return ret; + } + + return 0; } static void __exit ebtable_broute_fini(void) { unregister_pernet_subsys(&broute_net_ops); + ebt_unregister_template(&broute_table); } module_init(ebtable_broute_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 8ec0b3736803..c0b121df4a9a 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -86,7 +86,7 @@ static const struct nf_hook_ops ebt_ops_filter[] = { }, }; -static int __net_init frame_filter_net_init(struct net *net) +static int frame_filter_table_init(struct net *net) { return ebt_register_table(net, &frame_filter, ebt_ops_filter); } @@ -102,19 +102,30 @@ static void __net_exit frame_filter_net_exit(struct net *net) } static struct pernet_operations frame_filter_net_ops = { - .init = frame_filter_net_init, .exit = frame_filter_net_exit, .pre_exit = frame_filter_net_pre_exit, }; static int __init ebtable_filter_init(void) { - return register_pernet_subsys(&frame_filter_net_ops); + int ret = ebt_register_template(&frame_filter, frame_filter_table_init); + + if (ret) + return ret; + + ret = register_pernet_subsys(&frame_filter_net_ops); + if (ret) { + ebt_unregister_template(&frame_filter); + return ret; + } + + return 0; } static void __exit ebtable_filter_fini(void) { unregister_pernet_subsys(&frame_filter_net_ops); + ebt_unregister_template(&frame_filter); } module_init(ebtable_filter_init); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 7c8a1064a531..4078151c224f 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -85,7 +85,7 @@ static const struct nf_hook_ops ebt_ops_nat[] = { }, }; -static int __net_init frame_nat_net_init(struct net *net) +static int frame_nat_table_init(struct net *net) { return ebt_register_table(net, &frame_nat, ebt_ops_nat); } @@ -101,19 +101,30 @@ static void __net_exit frame_nat_net_exit(struct net *net) } static struct pernet_operations frame_nat_net_ops = { - .init = frame_nat_net_init, .exit = frame_nat_net_exit, .pre_exit = frame_nat_net_pre_exit, }; static int __init ebtable_nat_init(void) { - return register_pernet_subsys(&frame_nat_net_ops); + int ret = ebt_register_template(&frame_nat, frame_nat_table_init); + + if (ret) + return ret; + + ret = register_pernet_subsys(&frame_nat_net_ops); + if (ret) { + ebt_unregister_template(&frame_nat); + return ret; + } + + return ret; } static void __exit ebtable_nat_fini(void) { unregister_pernet_subsys(&frame_nat_net_ops); + ebt_unregister_template(&frame_nat); } module_init(ebtable_nat_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f022deb3721e..83d1798dfbb4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -44,7 +44,16 @@ struct ebt_pernet { struct list_head tables; }; +struct ebt_template { + struct list_head list; + char name[EBT_TABLE_MAXNAMELEN]; + struct module *owner; + /* called when table is needed in the given netns */ + int (*table_init)(struct net *net); +}; + static unsigned int ebt_pernet_id __read_mostly; +static LIST_HEAD(template_tables); static DEFINE_MUTEX(ebt_mutex); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -309,30 +318,57 @@ letscontinue: /* If it succeeds, returns element and locks mutex */ static inline void * -find_inlist_lock_noload(struct list_head *head, const char *name, int *error, +find_inlist_lock_noload(struct net *net, const char *name, int *error, struct mutex *mutex) { - struct { - struct list_head list; - char name[EBT_FUNCTION_MAXNAMELEN]; - } *e; + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + struct ebt_template *tmpl; + struct ebt_table *table; mutex_lock(mutex); - list_for_each_entry(e, head, list) { - if (strcmp(e->name, name) == 0) - return e; + list_for_each_entry(table, &ebt_net->tables, list) { + if (strcmp(table->name, name) == 0) + return table; } + + list_for_each_entry(tmpl, &template_tables, list) { + if (strcmp(name, tmpl->name) == 0) { + struct module *owner = tmpl->owner; + + if (!try_module_get(owner)) + goto out; + + mutex_unlock(mutex); + + *error = tmpl->table_init(net); + if (*error) { + module_put(owner); + return NULL; + } + + mutex_lock(mutex); + module_put(owner); + break; + } + } + + list_for_each_entry(table, &ebt_net->tables, list) { + if (strcmp(table->name, name) == 0) + return table; + } + +out: *error = -ENOENT; mutex_unlock(mutex); return NULL; } static void * -find_inlist_lock(struct list_head *head, const char *name, const char *prefix, +find_inlist_lock(struct net *net, const char *name, const char *prefix, int *error, struct mutex *mutex) { return try_then_request_module( - find_inlist_lock_noload(head, name, error, mutex), + find_inlist_lock_noload(net, name, error, mutex), "%s%s", prefix, name); } @@ -340,10 +376,7 @@ static inline struct ebt_table * find_table_lock(struct net *net, const char *name, int *error, struct mutex *mutex) { - struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); - - return find_inlist_lock(&ebt_net->tables, name, - "ebtable_", error, mutex); + return find_inlist_lock(net, name, "ebtable_", error, mutex); } static inline void ebt_free_table_info(struct ebt_table_info *info) @@ -1258,6 +1291,54 @@ out: return ret; } +int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct net *net)) +{ + struct ebt_template *tmpl; + + mutex_lock(&ebt_mutex); + list_for_each_entry(tmpl, &template_tables, list) { + if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } + } + + tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL); + if (!tmpl) { + mutex_unlock(&ebt_mutex); + return -ENOMEM; + } + + tmpl->table_init = table_init; + strscpy(tmpl->name, t->name, sizeof(tmpl->name)); + tmpl->owner = t->me; + list_add(&tmpl->list, &template_tables); + + mutex_unlock(&ebt_mutex); + return 0; +} +EXPORT_SYMBOL(ebt_register_template); + +void ebt_unregister_template(const struct ebt_table *t) +{ + struct ebt_template *tmpl; + + mutex_lock(&ebt_mutex); + list_for_each_entry(tmpl, &template_tables, list) { + if (strcmp(t->name, tmpl->name)) + continue; + + list_del(&tmpl->list); + mutex_unlock(&ebt_mutex); + kfree(tmpl); + return; + } + + mutex_unlock(&ebt_mutex); + WARN_ON_ONCE(1); +} +EXPORT_SYMBOL(ebt_unregister_template); + static struct ebt_table *__ebt_find_table(struct net *net, const char *name) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 8d033a75a766..fdbed3158555 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, skb = ip_fraglist_next(&iter); } + + if (!err) + return 0; + + kfree_skb_list(iter.frag); + return err; } slow_path: diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index f564f82e91d9..68d2cbf8331a 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -416,7 +416,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags) { - if (in_irq() || in_nmi()) + if (in_hardirq() || in_nmi()) return (unsigned long)NULL; return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags); @@ -425,7 +425,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, struct sock *, sk) { - if (in_irq() || in_nmi()) + if (in_hardirq() || in_nmi()) return -EPERM; return ____bpf_sk_storage_delete(map, sk); diff --git a/net/core/dev.c b/net/core/dev.c index eaaeff404ce9..74fd402d26dd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3107,7 +3107,7 @@ EXPORT_SYMBOL(__dev_kfree_skb_irq); void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) { - if (in_irq() || irqs_disabled()) + if (in_hardirq() || irqs_disabled()) __dev_kfree_skb_irq(skb, reason); else dev_kfree_skb(skb); @@ -7532,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, { struct netdev_adjacent *lower; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); @@ -9297,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } -static u8 dev_xdp_prog_count(struct net_device *dev) +u8 dev_xdp_prog_count(struct net_device *dev) { u8 count = 0; int i; @@ -9307,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev) count++; return count; } +EXPORT_SYMBOL_GPL(dev_xdp_prog_count); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { @@ -9400,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack { unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; + struct net_device *upper; + struct list_head *iter; enum bpf_xdp_mode mode; bpf_op_t bpf_op; int err; @@ -9438,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EBUSY; } + /* don't allow if an upper device already has a program */ + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + if (dev_xdp_prog_count(upper) > 0) { + NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program"); + return -EEXIST; + } + } + cur_prog = dev_xdp_prog(dev, mode); /* can't replace attached prog with link */ if (link && cur_prog) { diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fa015319af6..a856ae401ea5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -92,7 +92,8 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), }; -static LIST_HEAD(devlink_list); +static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); +#define DEVLINK_REGISTERED XA_MARK_1 /* devlink_mutex * @@ -108,10 +109,23 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); +static void devlink_put(struct devlink *devlink) +{ + if (refcount_dec_and_test(&devlink->refcount)) + complete(&devlink->comp); +} + +static bool __must_check devlink_try_get(struct devlink *devlink) +{ + return refcount_inc_not_zero(&devlink->refcount); +} + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { struct devlink *devlink; + unsigned long index; + bool found = false; char *busname; char *devname; @@ -123,19 +137,19 @@ static struct devlink *devlink_get_from_attrs(struct net *net, lockdep_assert_held(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) - return devlink; + net_eq(devlink_net(devlink), net)) { + found = true; + break; + } } - return ERR_PTR(-ENODEV); -} + if (!found || !devlink_try_get(devlink)) + devlink = ERR_PTR(-ENODEV); -static struct devlink *devlink_get_from_info(struct genl_info *info) -{ - return devlink_get_from_attrs(genl_info_net(info), info->attrs); + return devlink; } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, @@ -486,7 +500,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, int err; mutex_lock(&devlink_mutex); - devlink = devlink_get_from_info(info); + devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(devlink)) { mutex_unlock(&devlink_mutex); return PTR_ERR(devlink); @@ -529,6 +543,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, unlock: if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return err; } @@ -541,6 +556,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, devlink = info->user_ptr[0]; if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); } @@ -804,10 +820,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } -static int -devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, bool *msg_updated) +static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { u8 hw_addr[MAX_ADDR_LEN]; int hw_addr_len; @@ -816,7 +833,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * if (!ops->port_function_hw_addr_get) return 0; - err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len, + extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -830,12 +848,11 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * } static int devlink_nl_rate_fill(struct sk_buff *msg, - struct devlink *devlink, struct devlink_rate *devlink_rate, - enum devlink_command cmd, u32 portid, - u32 seq, int flags, - struct netlink_ext_ack *extack) + enum devlink_command cmd, u32 portid, u32 seq, + int flags, struct netlink_ext_ack *extack) { + struct devlink *devlink = devlink_rate->devlink; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -893,12 +910,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate) opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED; } -static int -devlink_port_fn_state_fill(struct devlink *devlink, - const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, - bool *msg_updated) +static int devlink_port_fn_state_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { enum devlink_port_fn_opstate opstate; enum devlink_port_fn_state state; @@ -907,7 +923,7 @@ devlink_port_fn_state_fill(struct devlink *devlink, if (!ops->port_fn_state_get) return 0; - err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack); + err = ops->port_fn_state_get(port, &state, &opstate, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -935,7 +951,6 @@ static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) { - struct devlink *devlink = port->devlink; const struct devlink_ops *ops; struct nlattr *function_attr; bool msg_updated = false; @@ -945,13 +960,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (!function_attr) return -EMSGSIZE; - ops = devlink->ops; - err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg, - extack, &msg_updated); + ops = port->devlink->ops; + err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack, + &msg_updated); if (err) goto out; - err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -960,12 +974,12 @@ out: return err; } -static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, +static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink_port *devlink_port, - enum devlink_command cmd, u32 portid, - u32 seq, int flags, - struct netlink_ext_ack *extack) + enum devlink_command cmd, u32 portid, u32 seq, + int flags, struct netlink_ext_ack *extack) { + struct devlink *devlink = devlink_port->devlink; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -1026,53 +1040,47 @@ nla_put_failure: static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { - struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; - if (!devlink_port->devlink) - return; - WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0, - NULL); + err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL); if (err) { nlmsg_free(msg); return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(devlink_port->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static void devlink_rate_notify(struct devlink_rate *devlink_rate, enum devlink_command cmd) { - struct devlink *devlink = devlink_rate->devlink; struct sk_buff *msg; int err; - WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && - cmd != DEVLINK_CMD_RATE_DEL); + WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - err = devlink_nl_rate_fill(msg, devlink, devlink_rate, - cmd, 0, 0, 0, NULL); + err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL); if (err) { nlmsg_free(msg); return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(devlink_rate->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, @@ -1081,13 +1089,18 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, struct devlink_rate *devlink_rate; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; @@ -1097,18 +1110,19 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_rate_fill(msg, devlink, - devlink_rate, - cmd, id, + err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id, cb->nlh->nlmsg_seq, NLM_F_MULTI, NULL); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -1123,7 +1137,6 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_rate *devlink_rate = info->user_ptr[1]; - struct devlink *devlink = devlink_rate->devlink; struct sk_buff *msg; int err; @@ -1131,8 +1144,7 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_rate_fill(msg, devlink, devlink_rate, - DEVLINK_CMD_RATE_NEW, + err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { @@ -1180,20 +1192,30 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) { + devlink_put(devlink); + continue; + } + if (idx < start) { idx++; + devlink_put(devlink); continue; } + err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); + devlink_put(devlink); if (err) goto out; idx++; @@ -1209,7 +1231,6 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; @@ -1217,8 +1238,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_port_fill(msg, devlink, devlink_port, - DEVLINK_CMD_PORT_NEW, + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { @@ -1235,32 +1255,39 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_port *devlink_port; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { idx++; continue; } - err = devlink_nl_port_fill(msg, devlink, devlink_port, + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, - cb->extack); + NLM_F_MULTI, cb->extack); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -1269,31 +1296,33 @@ out: return msg->len; } -static int devlink_port_type_set(struct devlink *devlink, - struct devlink_port *devlink_port, +static int devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type port_type) { int err; - if (devlink->ops->port_type_set) { - if (port_type == devlink_port->type) - return 0; - err = devlink->ops->port_type_set(devlink_port, port_type); - if (err) - return err; - devlink_port->desired_type = port_type; - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + if (!devlink_port->devlink->ops->port_type_set) + return -EOPNOTSUPP; + + if (port_type == devlink_port->type) return 0; - } - return -EOPNOTSUPP; + + err = devlink_port->devlink->ops->port_type_set(devlink_port, + port_type); + if (err) + return err; + + devlink_port->desired_type = port_type; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + return 0; } -static int -devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_hw_addr_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { - const struct devlink_ops *ops; + const struct devlink_ops *ops = port->devlink->ops; const u8 *hw_addr; int hw_addr_len; @@ -1314,17 +1343,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * } } - ops = devlink->ops; if (!ops->port_function_hw_addr_set) { NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes"); return -EOPNOTSUPP; } - return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); + return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len, + extack); } -static int devlink_port_fn_state_set(struct devlink *devlink, - struct devlink_port *port, +static int devlink_port_fn_state_set(struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -1332,18 +1360,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink, const struct devlink_ops *ops; state = nla_get_u8(attr); - ops = devlink->ops; + ops = port->devlink->ops; if (!ops->port_fn_state_set) { NL_SET_ERR_MSG_MOD(extack, "Function does not support state setting"); return -EOPNOTSUPP; } - return ops->port_fn_state_set(devlink, port, state, extack); + return ops->port_fn_state_set(port, state, extack); } -static int -devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1]; int err; @@ -1357,7 +1385,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; if (attr) { - err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + err = devlink_port_function_hw_addr_set(port, attr, extack); if (err) return err; } @@ -1367,7 +1395,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, */ attr = tb[DEVLINK_PORT_FN_ATTR_STATE]; if (attr) - err = devlink_port_fn_state_set(devlink, port, attr, extack); + err = devlink_port_fn_state_set(port, attr, extack); if (!err) devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); @@ -1378,14 +1406,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink *devlink = devlink_port->devlink; int err; if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) { enum devlink_port_type port_type; port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]); - err = devlink_port_type_set(devlink, devlink_port, port_type); + err = devlink_port_type_set(devlink_port, port_type); if (err) return err; } @@ -1394,7 +1421,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION]; struct netlink_ext_ack *extack = info->extack; - err = devlink_port_function_set(devlink, devlink_port, attr, extack); + err = devlink_port_function_set(devlink_port, attr, extack); if (err) return err; } @@ -1489,9 +1516,8 @@ static int devlink_port_new_notifiy(struct devlink *devlink, goto out; } - err = devlink_nl_port_fill(msg, devlink, devlink_port, - DEVLINK_CMD_NEW, info->snd_portid, - info->snd_seq, 0, NULL); + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, + info->snd_portid, info->snd_seq, 0, NULL); if (err) goto out; @@ -1895,13 +1921,18 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { @@ -1915,11 +1946,14 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2039,14 +2073,19 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_pool_get) - continue; + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_pool_get_dumpit(msg, start, &idx, devlink, @@ -2057,10 +2096,13 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2252,14 +2294,19 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_port_pool_get) - continue; + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_port_pool_get_dumpit(msg, start, &idx, @@ -2270,10 +2317,13 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2493,14 +2543,18 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_tc_pool_bind_get) - continue; + goto retry; mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { @@ -2513,10 +2567,13 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -4112,7 +4169,7 @@ out_free_msg: static void devlink_flash_update_begin_notify(struct devlink *devlink) { - struct devlink_flash_notify params = { 0 }; + struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE, @@ -4121,7 +4178,7 @@ static void devlink_flash_update_begin_notify(struct devlink *devlink) static void devlink_flash_update_end_notify(struct devlink *devlink) { - struct devlink_flash_notify params = { 0 }; + struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_END, @@ -4278,6 +4335,21 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -4548,13 +4620,18 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, struct devlink_param_item *param_item; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(param_item, &devlink->param_list, list) { if (idx < start) { @@ -4570,11 +4647,14 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -4816,13 +4896,18 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct devlink_port *devlink_port; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { list_for_each_entry(param_item, @@ -4842,12 +4927,15 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -5057,7 +5145,6 @@ static void devlink_nl_region_notify(struct devlink_region *region, struct devlink_snapshot *snapshot, enum devlink_command cmd) { - struct devlink *devlink = region->devlink; struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); @@ -5066,8 +5153,9 @@ static void devlink_nl_region_notify(struct devlink_region *region, if (IS_ERR(msg)) return; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(region->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } /** @@ -5385,15 +5473,22 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, &idx, start); +retry: + devlink_put(devlink); if (err) goto out; } @@ -5756,6 +5851,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, nla_nest_end(skb, chunks_attr); genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return skb->len; @@ -5764,6 +5860,7 @@ nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: mutex_unlock(&devlink->lock); + devlink_put(devlink); out_dev: mutex_unlock(&devlink_mutex); return err; @@ -5910,22 +6007,20 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - continue; - if (idx < start) { - idx++; + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; - } - if (!devlink->ops->info_get) { - idx++; - continue; - } + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + + if (idx < start || !devlink->ops->info_get) + goto inc; mutex_lock(&devlink->lock); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, @@ -5935,9 +6030,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, mutex_unlock(&devlink->lock); if (err == -EOPNOTSUPP) err = 0; - else if (err) + else if (err) { + devlink_put(devlink); break; + } +inc: idx++; +retry: + devlink_put(devlink); } mutex_unlock(&devlink_mutex); @@ -6751,11 +6851,11 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy); static int devlink_nl_health_reporter_fill(struct sk_buff *msg, - struct devlink *devlink, struct devlink_health_reporter *reporter, enum devlink_command cmd, u32 portid, u32 seq, int flags) { + struct devlink *devlink = reporter->devlink; struct nlattr *reporter_attr; void *hdr; @@ -6832,8 +6932,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, if (!msg) return; - err = devlink_nl_health_reporter_fill(msg, reporter->devlink, - reporter, cmd, 0, 0, 0); + err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; @@ -7023,6 +7122,7 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) goto unlock; reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return reporter; unlock: @@ -7066,7 +7166,7 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, goto out; } - err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + err = devlink_nl_health_reporter_fill(msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET, info->snd_portid, info->snd_seq, 0); @@ -7089,13 +7189,18 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct devlink_port *port; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry_rep; + mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { @@ -7103,24 +7208,29 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_health_reporter_fill(msg, devlink, - reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); + err = devlink_nl_health_reporter_fill( + msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + NLM_F_MULTI); if (err) { mutex_unlock(&devlink->reporters_lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->reporters_lock); +retry_rep: + devlink_put(devlink); } - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry_port; + mutex_lock(&devlink->lock); list_for_each_entry(port, &devlink->port_list, list) { mutex_lock(&port->reporters_lock); @@ -7129,14 +7239,15 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_health_reporter_fill(msg, devlink, reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); + err = devlink_nl_health_reporter_fill( + msg, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { mutex_unlock(&port->reporters_lock); mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; @@ -7144,6 +7255,8 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, mutex_unlock(&port->reporters_lock); } mutex_unlock(&devlink->lock); +retry_port: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -7672,13 +7785,18 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, struct devlink_trap_item *trap_item; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < start) { @@ -7692,11 +7810,14 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -7891,13 +8012,18 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, u32 portid = NETLINK_CB(cb->skb).portid; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(group_item, &devlink->trap_group_list, list) { @@ -7912,11 +8038,14 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -8197,13 +8326,18 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, u32 portid = NETLINK_CB(cb->skb).portid; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { @@ -8218,11 +8352,14 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -8769,24 +8906,35 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) * @ops: ops * @priv_size: size of user private data * @net: net namespace + * @dev: parent device * * Allocate new devlink instance resources, including devlink index * and name. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net) + size_t priv_size, struct net *net, + struct device *dev) { struct devlink *devlink; + static u32 last_id; + int ret; - if (WARN_ON(!ops)) - return NULL; - + WARN_ON(!ops || !dev); if (!devlink_reload_actions_valid(ops)) return NULL; devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); if (!devlink) return NULL; + + ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b, + &last_id, GFP_KERNEL); + if (ret < 0) { + kfree(devlink); + return NULL; + } + + devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); @@ -8803,6 +8951,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->trap_policer_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); + refcount_set(&devlink->refcount, 1); + init_completion(&devlink->comp); + return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc_ns); @@ -8811,14 +8962,11 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns); * devlink_register - Register devlink instance * * @devlink: devlink - * @dev: parent device */ -int devlink_register(struct devlink *devlink, struct device *dev) +int devlink_register(struct devlink *devlink) { - WARN_ON(devlink->dev); - devlink->dev = dev; mutex_lock(&devlink_mutex); - list_add_tail(&devlink->list, &devlink_list); + xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); return 0; @@ -8832,11 +8980,14 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { + devlink_put(devlink); + wait_for_completion(&devlink->comp); + mutex_lock(&devlink_mutex); WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); - list_del(&devlink->list); + xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -8898,6 +9049,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->port_list)); xa_destroy(&devlink->snapshot_ids); + xa_erase(&devlinks, devlink->index); kfree(devlink); } @@ -9787,6 +9939,22 @@ static int devlink_param_verify(const struct devlink_param *param) return devlink_param_driver_verify(param); } +static int __devlink_param_register_one(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command reg_cmd) +{ + int err; + + err = devlink_param_verify(param); + if (err) + return err; + + return devlink_param_register_one(devlink, port_index, + param_list, param, reg_cmd); +} + static int __devlink_params_register(struct devlink *devlink, unsigned int port_index, struct list_head *param_list, @@ -9801,12 +9969,8 @@ static int __devlink_params_register(struct devlink *devlink, mutex_lock(&devlink->lock); for (i = 0; i < params_count; i++, param++) { - err = devlink_param_verify(param); - if (err) - goto rollback; - - err = devlink_param_register_one(devlink, port_index, - param_list, param, reg_cmd); + err = __devlink_param_register_one(devlink, port_index, + param_list, param, reg_cmd); if (err) goto rollback; } @@ -9879,6 +10043,43 @@ void devlink_params_unregister(struct devlink *devlink, EXPORT_SYMBOL_GPL(devlink_params_unregister); /** + * devlink_param_register - register one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Register the configuration parameter supported by the driver. + * Return: returns 0 on successful registration or error code otherwise. + */ +int devlink_param_register(struct devlink *devlink, + const struct devlink_param *param) +{ + int err; + + mutex_lock(&devlink->lock); + err = __devlink_param_register_one(devlink, 0, &devlink->param_list, + param, DEVLINK_CMD_PARAM_NEW); + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_param_register); + +/** + * devlink_param_unregister - unregister one configuration parameter + * @devlink: devlink + * @param: configuration parameter to unregister + */ +void devlink_param_unregister(struct devlink *devlink, + const struct devlink_param *param) +{ + mutex_lock(&devlink->lock); + devlink_param_unregister_one(devlink, 0, &devlink->param_list, param, + DEVLINK_CMD_PARAM_DEL); + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_param_unregister); + +/** * devlink_params_publish - publish configuration parameters * * @devlink: devlink @@ -9921,6 +10122,54 @@ void devlink_params_unpublish(struct devlink *devlink) EXPORT_SYMBOL_GPL(devlink_params_unpublish); /** + * devlink_param_publish - publish one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Publish previously registered configuration parameter. + */ +void devlink_param_publish(struct devlink *devlink, + const struct devlink_param *param) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->param != param || param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + break; + } +} +EXPORT_SYMBOL_GPL(devlink_param_publish); + +/** + * devlink_param_unpublish - unpublish one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Unpublish previously registered configuration parameter. + */ +void devlink_param_unpublish(struct devlink *devlink, + const struct devlink_param *param) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->param != param || !param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + break; + } +} +EXPORT_SYMBOL_GPL(devlink_param_unpublish); + +/** * devlink_port_params_register - register port configuration parameters * * @devlink_port: devlink port @@ -11275,23 +11524,29 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) { struct devlink *devlink; u32 actions_performed; + unsigned long index; int err; /* In case network namespace is getting destroyed, reload * all devlink instances from this namespace into init_net. */ mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (net_eq(devlink_net(devlink), net)) { - if (WARN_ON(!devlink_reload_supported(devlink->ops))) - continue; - err = devlink_reload(devlink, &init_net, - DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_LIMIT_UNSPEC, - &actions_performed, NULL); - if (err && err != -EOPNOTSUPP) - pr_warn("Failed to reload devlink instance into init_net\n"); - } + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + + if (!net_eq(devlink_net(devlink), net)) + goto retry; + + WARN_ON(!devlink_reload_supported(devlink->ops)); + err = devlink_reload(devlink, &init_net, + DEVLINK_RELOAD_ACTION_DRIVER_REINIT, + DEVLINK_RELOAD_LIMIT_UNSPEC, + &actions_performed, NULL); + if (err && err != -EOPNOTSUPP) + pr_warn("Failed to reload devlink instance into init_net\n"); +retry: + devlink_put(devlink); } mutex_unlock(&devlink_mutex); } diff --git a/net/core/filter.c b/net/core/filter.c index 6f493ef5bb14..3aca07c44fad 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3933,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map) } } +DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); +EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp) +{ + struct net_device *master, *slave; + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); + slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); + if (slave && slave != xdp->rxq->dev) { + /* The target device is different from the receiving device, so + * redirect it to the new device. + * Using XDP_REDIRECT gets the correct behaviour from XDP enabled + * drivers to unmap the packet from their rx ring. + */ + ri->tgt_index = slave->ifindex; + ri->map_id = INT_MAX; + ri->map_type = BPF_MAP_TYPE_UNSPEC; + return XDP_REDIRECT; + } + return XDP_TX; +} +EXPORT_SYMBOL_GPL(xdp_master_redirect); + int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 715b67f6c62f..6beaea13564a 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -321,13 +321,13 @@ EXPORT_SYMBOL(flow_block_cb_setup_simple); static DEFINE_MUTEX(flow_indr_block_lock); static LIST_HEAD(flow_block_indr_list); static LIST_HEAD(flow_block_indr_dev_list); +static LIST_HEAD(flow_indir_dev_list); struct flow_indr_dev { struct list_head list; flow_indr_block_bind_cb_t *cb; void *cb_priv; refcount_t refcnt; - struct rcu_head rcu; }; static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb, @@ -346,6 +346,33 @@ static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb, return indr_dev; } +struct flow_indir_dev_info { + void *data; + struct net_device *dev; + struct Qdisc *sch; + enum tc_setup_type type; + void (*cleanup)(struct flow_block_cb *block_cb); + struct list_head list; + enum flow_block_command command; + enum flow_block_binder_type binder_type; + struct list_head *cb_list; +}; + +static void existing_qdiscs_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) +{ + struct flow_block_offload bo; + struct flow_indir_dev_info *cur; + + list_for_each_entry(cur, &flow_indir_dev_list, list) { + memset(&bo, 0, sizeof(bo)); + bo.command = cur->command; + bo.binder_type = cur->binder_type; + INIT_LIST_HEAD(&bo.cb_list); + cb(cur->dev, cur->sch, cb_priv, cur->type, &bo, cur->data, cur->cleanup); + list_splice(&bo.cb_list, cur->cb_list); + } +} + int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_indr_dev *indr_dev; @@ -367,6 +394,7 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) } list_add(&indr_dev->list, &flow_block_indr_dev_list); + existing_qdiscs_register(cb, cb_priv); mutex_unlock(&flow_indr_block_lock); return 0; @@ -463,7 +491,59 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, +static struct flow_indir_dev_info *find_indir_dev(void *data) +{ + struct flow_indir_dev_info *cur; + + list_for_each_entry(cur, &flow_indir_dev_list, list) { + if (cur->data == data) + return cur; + } + return NULL; +} + +static int indir_dev_add(void *data, struct net_device *dev, struct Qdisc *sch, + enum tc_setup_type type, void (*cleanup)(struct flow_block_cb *block_cb), + struct flow_block_offload *bo) +{ + struct flow_indir_dev_info *info; + + info = find_indir_dev(data); + if (info) + return -EEXIST; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->data = data; + info->dev = dev; + info->sch = sch; + info->type = type; + info->cleanup = cleanup; + info->command = bo->command; + info->binder_type = bo->binder_type; + info->cb_list = bo->cb_list_head; + + list_add(&info->list, &flow_indir_dev_list); + return 0; +} + +static int indir_dev_remove(void *data) +{ + struct flow_indir_dev_info *info; + + info = find_indir_dev(data); + if (!info) + return -ENOENT; + + list_del(&info->list); + + kfree(info); + return 0; +} + +int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) @@ -471,6 +551,12 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, struct flow_indr_dev *this; mutex_lock(&flow_indr_block_lock); + + if (bo->command == FLOW_BLOCK_BIND) + indir_dev_add(data, dev, sch, type, cleanup, bo); + else if (bo->command == FLOW_BLOCK_UNBIND) + indir_dev_remove(data); + list_for_each_entry(this, &flow_block_indr_dev_list, list) this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 75431ca9300f..1a455847da54 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -158,7 +158,7 @@ static void linkwatch_do_dev(struct net_device *dev) clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); rfc2863_policy(dev); - if (dev->flags & IFF_UP && netif_device_present(dev)) { + if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); else @@ -204,7 +204,8 @@ static void __linkwatch_run_queue(int urgent_only) dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); - if (urgent_only && !linkwatch_urgent_event(dev)) { + if (!netif_device_present(dev) || + (urgent_only && !linkwatch_urgent_event(dev))) { list_add_tail(&dev->link_watch_list, &lweventlist); continue; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b963d6b02c4f..2d5bc3a75fae 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2528,6 +2528,13 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx) return false; master = dev ? netdev_master_upper_dev_get(dev) : NULL; + + /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another + * invalid value for ifindex to denote "no master". + */ + if (master_idx == -1) + return !!master; + if (!master || master->ifindex != master_idx) return true; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index d8b9dbabd4a4..eab5fc88a002 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " - "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", + seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu " + "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, @@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); + seq_puts(seq, "Interface| Receive " + " | Transmit\n" + " | bytes packets errs drop fifo frame " + "compressed multicast| bytes packets errs " + " drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; @@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v) struct packet_type *pt = v; if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); + seq_puts(seq, "Type Device Function\n"); else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s %ps\n", + seq_printf(seq, " %-9s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } @@ -327,12 +327,14 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) struct netdev_hw_addr *ha; struct net_device *dev = v; - if (v == SEQ_START_TOKEN) + if (v == SEQ_START_TOKEN) { + seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n"); return 0; + } netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { - seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", + seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9b5a767eddd5..a448a9b5bb2d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -98,7 +98,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) } ng = net_alloc_generic(); - if (ng == NULL) + if (!ng) return -ENOMEM; /* @@ -148,13 +148,6 @@ out: return err; } -static void ops_free(const struct pernet_operations *ops, struct net *net) -{ - if (ops->id && ops->size) { - kfree(net_generic(net, *ops->id)); - } -} - static void ops_pre_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { @@ -184,7 +177,7 @@ static void ops_free_list(const struct pernet_operations *ops, struct net *net; if (ops->size && ops->id) { list_for_each_entry(net, net_exit_list, exit_list) - ops_free(ops, net); + kfree(net_generic(net, *ops->id)); } } @@ -433,15 +426,18 @@ out_free: static void net_free(struct net *net) { - kfree(rcu_access_pointer(net->gen)); - kmem_cache_free(net_cachep, net); + if (refcount_dec_and_test(&net->passive)) { + kfree(rcu_access_pointer(net->gen)); + kmem_cache_free(net_cachep, net); + } } void net_drop_ns(void *p) { - struct net *ns = p; - if (ns && refcount_dec_and_test(&ns->passive)) - net_free(ns); + struct net *net = (struct net *)p; + + if (net) + net_free(net); } struct net *copy_net_ns(unsigned long flags, @@ -479,7 +475,7 @@ struct net *copy_net_ns(unsigned long flags, put_userns: key_remove_domain(net->key_domain); put_user_ns(user_ns); - net_drop_ns(net); + net_free(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ -611,7 +607,7 @@ static void cleanup_net(struct work_struct *work) dec_net_namespaces(net->ucounts); key_remove_domain(net->key_domain); put_user_ns(net->user_ns); - net_drop_ns(net); + net_free(net); } } @@ -1120,6 +1116,14 @@ static int __init net_ns_init(void) pure_initcall(net_ns_init); +static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) +{ + ops_pre_exit_list(ops, net_exit_list); + synchronize_rcu(); + ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); +} + #ifdef CONFIG_NET_NS static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) @@ -1145,10 +1149,7 @@ static int __register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + free_exit_list(ops, &net_exit_list); return error; } @@ -1161,10 +1162,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + + free_exit_list(ops, &net_exit_list); } #else @@ -1187,10 +1186,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) } else { LIST_HEAD(net_exit_list); list_add(&init_net.exit_list, &net_exit_list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + free_exit_list(ops, &net_exit_list); } } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e4eb45b139c..e1409056965a 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -24,6 +24,8 @@ #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) +#define BIAS_MAX LONG_MAX + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool, */ } + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && + pool->p.flags & PP_FLAG_PAGE_FRAG) + return -EINVAL; + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; @@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) return true; } +static void page_pool_set_pp_info(struct page_pool *pool, + struct page *page) +{ + page->pp = pool; + page->pp_magic |= PP_SIGNATURE; +} + +static void page_pool_clear_pp_info(struct page *page) +{ + page->pp_magic = 0; + page->pp = NULL; +} + static struct page *__page_pool_alloc_page_order(struct page_pool *pool, gfp_t gfp) { @@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, return NULL; } - page->pp_magic |= PP_SIGNATURE; + page_pool_set_pp_info(pool, page); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, put_page(page); continue; } - page->pp_magic |= PP_SIGNATURE; + + page_pool_set_pp_info(pool, page); pool->alloc.cache[pool->alloc.count++] = page; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -345,7 +365,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) DMA_ATTR_SKIP_CPU_SYNC); page_pool_set_dma_addr(page, 0); skip_dma_unmap: - page->pp_magic = 0; + page_pool_clear_pp_info(page); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -405,6 +425,11 @@ static __always_inline struct page * __page_pool_put_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { + /* It is not the last user for the page frag case */ + if (pool->p.flags & PP_FLAG_PAGE_FRAG && + page_pool_atomic_sub_frag_count_return(page, 1)) + return NULL; + /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the * regular page allocator APIs. @@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, } EXPORT_SYMBOL(page_pool_put_page_bulk); +static struct page *page_pool_drain_frag(struct page_pool *pool, + struct page *page) +{ + long drain_count = BIAS_MAX - pool->frag_users; + + /* Some user is still using the page frag */ + if (likely(page_pool_atomic_sub_frag_count_return(page, + drain_count))) + return NULL; + + if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, -1); + + return page; + } + + page_pool_return_page(pool, page); + return NULL; +} + +static void page_pool_free_frag(struct page_pool *pool) +{ + long drain_count = BIAS_MAX - pool->frag_users; + struct page *page = pool->frag_page; + + pool->frag_page = NULL; + + if (!page || + page_pool_atomic_sub_frag_count_return(page, drain_count)) + return; + + page_pool_return_page(pool, page); +} + +struct page *page_pool_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size, gfp_t gfp) +{ + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page = pool->frag_page; + + if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || + size > max_size)) + return NULL; + + size = ALIGN(size, dma_get_cache_alignment()); + *offset = pool->frag_offset; + + if (page && *offset + size > max_size) { + page = page_pool_drain_frag(pool, page); + if (page) + goto frag_reset; + } + + if (!page) { + page = page_pool_alloc_pages(pool, gfp); + if (unlikely(!page)) { + pool->frag_page = NULL; + return NULL; + } + + pool->frag_page = page; + +frag_reset: + pool->frag_users = 1; + *offset = 0; + pool->frag_offset = size; + page_pool_set_frag_count(page, BIAS_MAX); + return page; + } + + pool->frag_users++; + pool->frag_offset = *offset + size; + return page; +} +EXPORT_SYMBOL(page_pool_alloc_frag); + static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; @@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_put(pool)) return; + page_pool_free_frag(pool); + if (!page_pool_release(pool)) return; @@ -634,7 +739,15 @@ bool page_pool_return_skb_page(struct page *page) struct page_pool *pp; page = compound_head(page); - if (unlikely(page->pp_magic != PP_SIGNATURE)) + + /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation + * in order to preserve any existing bits, such as bit 0 for the + * head page of compound page and bit 1 for pfmemalloc page, so + * mask those bits for freeing side when doing below checking, + * and page_is_pfmemalloc() is checked in __page_pool_put_page() + * to avoid recycling the pfmemalloc page. + */ + if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) return false; pp = page->pp; @@ -644,7 +757,6 @@ bool page_pool_return_skb_page(struct page *page) * The page will be returned to the pool here regardless of the * 'flipped' fragment being in use or not. */ - page->pp = NULL; page_pool_put_full_page(pp, page, false); return true; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 314f97acf39d..9e5a3249373c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -175,6 +175,9 @@ #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) +/* Max number of internet mix entries that can be specified in imix_weights. */ +#define MAX_IMIX_ENTRIES 20 +#define IMIX_PRECISION 100 /* Precision of IMIX distribution */ #define func_enter() pr_debug("entering %s\n", __func__); @@ -242,6 +245,12 @@ static char *pkt_flag_names[] = { #define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4) #define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) +struct imix_pkt { + u64 size; + u64 weight; + u64 count_so_far; +}; + struct flow_state { __be32 cur_daddr; int count; @@ -343,6 +352,12 @@ struct pktgen_dev { __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 (see RFC 3260, sec. 4) */ + /* IMIX */ + unsigned int n_imix_entries; + struct imix_pkt imix_entries[MAX_IMIX_ENTRIES]; + /* Maps 0-IMIX_PRECISION range to imix_entry based on probability*/ + __u8 imix_distribution[IMIX_PRECISION]; + /* MPLS */ unsigned int nr_labels; /* Depth of stack, 0 = no MPLS */ __be32 labels[MAX_MPLS_LABELS]; @@ -471,6 +486,7 @@ static void pktgen_stop_all_threads(struct pktgen_net *pn); static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); +static void fill_imix_distribution(struct pktgen_dev *pkt_dev); /* Module parameters, defaults. */ static int pg_count_d __read_mostly = 1000; @@ -552,6 +568,16 @@ static int pktgen_if_show(struct seq_file *seq, void *v) (unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); + if (pkt_dev->n_imix_entries > 0) { + seq_puts(seq, " imix_weights: "); + for (i = 0; i < pkt_dev->n_imix_entries; i++) { + seq_printf(seq, "%llu,%llu ", + pkt_dev->imix_entries[i].size, + pkt_dev->imix_entries[i].weight); + } + seq_puts(seq, "\n"); + } + seq_printf(seq, " frags: %d delay: %llu clone_skb: %d ifname: %s\n", pkt_dev->nfrags, (unsigned long long) pkt_dev->delay, @@ -669,6 +695,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v) (unsigned long long)pkt_dev->sofar, (unsigned long long)pkt_dev->errors); + if (pkt_dev->n_imix_entries > 0) { + int i; + + seq_puts(seq, " imix_size_counts: "); + for (i = 0; i < pkt_dev->n_imix_entries; i++) { + seq_printf(seq, "%llu,%llu ", + pkt_dev->imix_entries[i].size, + pkt_dev->imix_entries[i].count_so_far); + } + seq_puts(seq, "\n"); + } + seq_printf(seq, " started: %lluus stopped: %lluus idle: %lluus\n", (unsigned long long) ktime_to_us(pkt_dev->started_at), @@ -792,6 +830,62 @@ done_str: return i; } +/* Parses imix entries from user buffer. + * The user buffer should consist of imix entries separated by spaces + * where each entry consists of size and weight delimited by commas. + * "size1,weight_1 size2,weight_2 ... size_n,weight_n" for example. + */ +static ssize_t get_imix_entries(const char __user *buffer, + struct pktgen_dev *pkt_dev) +{ + const int max_digits = 10; + int i = 0; + long len; + char c; + + pkt_dev->n_imix_entries = 0; + + do { + unsigned long weight; + unsigned long size; + + len = num_arg(&buffer[i], max_digits, &size); + if (len < 0) + return len; + i += len; + if (get_user(c, &buffer[i])) + return -EFAULT; + /* Check for comma between size_i and weight_i */ + if (c != ',') + return -EINVAL; + i++; + + if (size < 14 + 20 + 8) + size = 14 + 20 + 8; + + len = num_arg(&buffer[i], max_digits, &weight); + if (len < 0) + return len; + if (weight <= 0) + return -EINVAL; + + pkt_dev->imix_entries[pkt_dev->n_imix_entries].size = size; + pkt_dev->imix_entries[pkt_dev->n_imix_entries].weight = weight; + + i += len; + if (get_user(c, &buffer[i])) + return -EFAULT; + + i++; + pkt_dev->n_imix_entries++; + + if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES) + return -E2BIG; + } while (c == ' '); + + return i; +} + static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) { unsigned int n = 0; @@ -960,6 +1054,20 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + if (!strcmp(name, "imix_weights")) { + if (pkt_dev->clone_skb > 0) + return -EINVAL; + + len = get_imix_entries(&user_buffer[i], pkt_dev); + if (len < 0) + return len; + + fill_imix_distribution(pkt_dev); + + i += len; + return count; + } + if (!strcmp(name, "debug")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) @@ -1082,10 +1190,16 @@ static ssize_t pktgen_if_write(struct file *file, len = num_arg(&user_buffer[i], 10, &value); if (len < 0) return len; + /* clone_skb is not supported for netif_receive xmit_mode and + * IMIX mode. + */ if ((value > 0) && ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) || !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) return -ENOTSUPP; + if (value > 0 && pkt_dev->n_imix_entries > 0) + return -EINVAL; + i += len; pkt_dev->clone_skb = value; @@ -2472,6 +2586,14 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) t = pkt_dev->min_pkt_size; } pkt_dev->cur_pkt_size = t; + } else if (pkt_dev->n_imix_entries > 0) { + struct imix_pkt *entry; + __u32 t = prandom_u32() % IMIX_PRECISION; + __u8 entry_index = pkt_dev->imix_distribution[t]; + + entry = &pkt_dev->imix_entries[entry_index]; + entry->count_so_far++; + pkt_dev->cur_pkt_size = entry->size; } set_cur_queue_map(pkt_dev); @@ -2479,6 +2601,32 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].count++; } +static void fill_imix_distribution(struct pktgen_dev *pkt_dev) +{ + int cumulative_probabilites[MAX_IMIX_ENTRIES]; + int j = 0; + __u64 cumulative_prob = 0; + __u64 total_weight = 0; + int i = 0; + + for (i = 0; i < pkt_dev->n_imix_entries; i++) + total_weight += pkt_dev->imix_entries[i].weight; + + /* Fill cumulative_probabilites with sum of normalized probabilities */ + for (i = 0; i < pkt_dev->n_imix_entries - 1; i++) { + cumulative_prob += div64_u64(pkt_dev->imix_entries[i].weight * + IMIX_PRECISION, + total_weight); + cumulative_probabilites[i] = cumulative_prob; + } + cumulative_probabilites[pkt_dev->n_imix_entries - 1] = 100; + + for (i = 0; i < IMIX_PRECISION; i++) { + if (i == cumulative_probabilites[j]) + j++; + pkt_dev->imix_distribution[i] = j; + } +} #ifdef CONFIG_XFRM static u32 pktgen_dst_metrics[RTAX_MAX + 1] = { @@ -3140,7 +3288,19 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC, ktime_to_ns(elapsed)); - bps = pps * 8 * pkt_dev->cur_pkt_size; + if (pkt_dev->n_imix_entries > 0) { + int i; + struct imix_pkt *entry; + + bps = 0; + for (i = 0; i < pkt_dev->n_imix_entries; i++) { + entry = &pkt_dev->imix_entries[i]; + bps += entry->size * entry->count_so_far; + } + bps = div64_u64(bps * 8 * NSEC_PER_SEC, ktime_to_ns(elapsed)); + } else { + bps = pps * 8 * pkt_dev->cur_pkt_size; + } mbps = bps; do_div(mbps, 1000000); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7c9d32cfe607..2dcf1c084b20 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1959,6 +1959,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx) return false; master = netdev_master_upper_dev_get(dev); + + /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need + * another invalid value for ifindex to denote "no master". + */ + if (master_idx == -1) + return !!master; + if (!master || master->ifindex != master_idx) return true; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9240af2ea8c9..f9311762cc47 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -156,7 +156,7 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) void *data; fragsz = SKB_DATA_ALIGN(fragsz); - if (in_irq() || irqs_disabled()) { + if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); } else { @@ -502,7 +502,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; - if (in_irq() || irqs_disabled()) { + if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; @@ -724,7 +724,7 @@ void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); if (skb->destructor) { - WARN_ON(in_irq()); + WARN_ON(in_hardirq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) diff --git a/net/core/sock.c b/net/core/sock.c index aada649e07e8..950f1e70dbf5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2728,10 +2728,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct proto *prot = sk->sk_prot; long allocated = sk_memory_allocated_add(sk, amt); + bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; bool charged = true; - if (mem_cgroup_sockets_enabled && sk->sk_memcg && - !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt))) + if (memcg_charge && + !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge()))) goto suppress_allocation; /* Under limit. */ @@ -2785,8 +2787,14 @@ suppress_allocation: /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ - if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) + if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { + /* Force charge with __GFP_NOFAIL */ + if (memcg_charge && !charged) { + mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge() | __GFP_NOFAIL); + } return 1; + } } if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) @@ -2794,7 +2802,7 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_memcg) + if (memcg_charge && charged) mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); return 0; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 9cc9d1ee6cdb..c5c1d2b8045e 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -41,9 +41,9 @@ extern bool dccp_debug; #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) #define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) #else -#define dccp_pr_debug(format, a...) -#define dccp_pr_debug_cat(format, a...) -#define dccp_debug(format, a...) +#define dccp_pr_debug(format, a...) do {} while (0) +#define dccp_pr_debug_cat(format, a...) do {} while (0) +#define dccp_debug(format, a...) do {} while (0) #endif extern struct inet_hashinfo dccp_hashinfo; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index bca1b5d66df2..548285539752 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -138,6 +138,7 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" + depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105 select PACKING help Say Y or M if you want to enable support for tagging frames with the diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a4c525f1cb17..dcd67801eca4 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -49,6 +49,9 @@ int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v) * Can be used to notify the switching fabric of events such as cross-chip * bridging between disjoint trees (such as islands of tagger-compatible * switches bridged by an incompatible middle switch). + * + * WARNING: this function is not reliable during probe time, because probing + * between trees is asynchronous and not all DSA trees might have probed. */ int dsa_broadcast(unsigned long e, void *v) { @@ -746,13 +749,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables */ - ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv)); + ds->devlink = + devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); if (!ds->devlink) return -ENOMEM; dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - err = devlink_register(ds->devlink, ds->dev); + err = devlink_register(ds->devlink); if (err) goto free_devlink; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index e43c5dc04282..b7a269e0513f 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -199,7 +199,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, /* port.c */ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); -int dsa_port_set_state(struct dsa_port *dp, u8 state); +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); void dsa_port_disable_rt(struct dsa_port *dp); @@ -241,11 +241,9 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp, int dsa_port_pre_bridge_flags(const struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack); @@ -263,8 +261,8 @@ int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); -int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid); -void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid); +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, @@ -454,6 +452,84 @@ static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) skb->offload_fwd_mark = !!(dp->bridge_dev); } +/* Helper for removing DSA header tags from packets in the RX path. + * Must not be called before skb_pull(len). + * skb->data + * | + * v + * | | | | | | | | | | | | | | | | | | | + * +-----------------------+-----------------------+---------------+-------+ + * | Destination MAC | Source MAC | DSA header | EType | + * +-----------------------+-----------------------+---------------+-------+ + * | | + * <----- len -----> <----- len -----> + * | + * >>>>>>> v + * >>>>>>> | | | | | | | | | | | | | | | + * >>>>>>> +-----------------------+-----------------------+-------+ + * >>>>>>> | Destination MAC | Source MAC | EType | + * +-----------------------+-----------------------+-------+ + * ^ + * | + * skb->data + */ +static inline void dsa_strip_etype_header(struct sk_buff *skb, int len) +{ + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN); +} + +/* Helper for creating space for DSA header tags in TX path packets. + * Must not be called before skb_push(len). + * + * Before: + * + * <<<<<<< | | | | | | | | | | | | | | | + * ^ <<<<<<< +-----------------------+-----------------------+-------+ + * | <<<<<<< | Destination MAC | Source MAC | EType | + * | +-----------------------+-----------------------+-------+ + * <----- len -----> + * | + * | + * skb->data + * + * After: + * + * | | | | | | | | | | | | | | | | | | | + * +-----------------------+-----------------------+---------------+-------+ + * | Destination MAC | Source MAC | DSA header | EType | + * +-----------------------+-----------------------+---------------+-------+ + * ^ | | + * | <----- len -----> + * skb->data + */ +static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len) +{ + memmove(skb->data, skb->data + len, 2 * ETH_ALEN); +} + +/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from + * skb_mac_header(skb), which leaves skb->data pointing at the first byte after + * what the DSA master perceives as the EtherType (the beginning of the L3 + * protocol). Since DSA EtherType header taggers treat the EtherType as part of + * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header + * is located 2 bytes behind skb->data. Note that EtherType in this context + * means the first 2 bytes of the DSA header, not the encapsulated EtherType + * that will become visible after the DSA header is stripped. + */ +static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb) +{ + return skb->data - 2; +} + +/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType + * header taggers start exactly where the EtherType is (the EtherType is + * treated as part of the DSA header). + */ +static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb) +{ + return skb->data + 2 * ETH_ALEN; +} + /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); diff --git a/net/dsa/port.c b/net/dsa/port.c index b927d94b6934..979042a64d1a 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) return dsa_tree_notify(dp->ds->dst, e, v); } -int dsa_port_set_state(struct dsa_port *dp, u8 state) +static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp) +{ + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + struct switchdev_notifier_fdb_info info = { + /* flush all VLANs */ + .vid = 0, + }; + + /* When the port becomes standalone it has already left the bridge. + * Don't notify the bridge in that case. + */ + if (!brport_dev) + return; + + call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE, + brport_dev, &info.info, NULL); +} + +static void dsa_port_fast_age(const struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_fast_age) + return; + + ds->ops->port_fast_age(ds, dp->index); + + dsa_port_notify_bridge_fdb_flush(dp); +} + +static bool dsa_port_can_configure_learning(struct dsa_port *dp) +{ + struct switchdev_brport_flags flags = { + .mask = BR_LEARNING, + }; + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags) + return false; + + err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL); + return !err; +} + +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) { struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) ds->ops->port_stp_state_set(ds, port, state); - if (ds->ops->port_fast_age) { + if (!dsa_port_can_configure_learning(dp) || + (do_fast_age && dp->learning)) { /* Fast age FDB entries or flush appropriate forwarding database * for the given port, if we are moving it from Learning or * Forwarding state, to Disabled or Blocking or Listening state. + * Ports that were standalone before the STP state change don't + * need to fast age the FDB, since address learning is off in + * standalone mode. */ if ((dp->stp_state == BR_STATE_LEARNING || @@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) (state == BR_STATE_DISABLED || state == BR_STATE_BLOCKING || state == BR_STATE_LISTENING)) - ds->ops->port_fast_age(ds, port); + dsa_port_fast_age(dp); } dp->stp_state = state; @@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) return 0; } -static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +static void dsa_port_set_state_now(struct dsa_port *dp, u8 state, + bool do_fast_age) { int err; - err = dsa_port_set_state(dp, state); + err = dsa_port_set_state(dp, state, do_fast_age); if (err) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } @@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) } if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false); if (dp->pl) phylink_start(dp->pl); @@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp) phylink_stop(dp->pl); if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_DISABLED); + dsa_port_set_state_now(dp, BR_STATE_DISABLED, false); if (ds->ops->port_disable) ds->ops->port_disable(ds, port); @@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, if (err) return err; - err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev)); + err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false); if (err && err != -EOPNOTSUPP) return err; @@ -186,10 +236,6 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack); - if (err && err != -EOPNOTSUPP) - return err; - err = dsa_port_ageing_time(dp, br_get_ageing_time(br)); if (err && err != -EOPNOTSUPP) return err; @@ -215,16 +261,10 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true); /* VLAN filtering is handled by dsa_switch_bridge_leave */ - /* Some drivers treat the notification for having a local multicast - * router by allowing multicast to be flooded to the CPU, so we should - * allow this in standalone mode too. - */ - dsa_port_mrouter(dp->cpu_dp, true, NULL); - /* Ageing time may be global to the switch chip, so don't change it * here because we have no good reason (or value) to change it to. */ @@ -386,7 +426,9 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); if (err) - pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); + dev_err(dp->ds->dev, + "port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n", + dp->index, ERR_PTR(err)); dsa_port_switchdev_unsync_attrs(dp); } @@ -485,8 +527,9 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); if (err) - pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n", - err); + dev_err(dp->ds->dev, + "port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n", + dp->index, ERR_PTR(err)); dsa_lag_unmap(dp->ds->dst, lag); } @@ -639,27 +682,35 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp, return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack); } -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; + int err; if (!ds->ops->port_bridge_flags) return -EOPNOTSUPP; - return ds->ops->port_bridge_flags(ds, dp->index, flags, extack); -} + err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack); + if (err) + return err; -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dp->ds; + if (flags.mask & BR_LEARNING) { + bool learning = flags.val & BR_LEARNING; - if (!ds->ops->port_set_mrouter) - return -EOPNOTSUPP; + if (learning == dp->learning) + return 0; + + if ((dp->learning && !learning) && + (dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING)) + dsa_port_fast_age(dp); - return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack); + dp->learning = learning; + } + + return 0; } int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, @@ -1258,10 +1309,12 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr) err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info); if (err) - pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n"); + dev_err(dp->ds->dev, + "port %d failed to notify DSA_NOTIFIER_HSR_LEAVE: %pe\n", + dp->index, ERR_PTR(err)); } -int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid) +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast) { struct dsa_notifier_tag_8021q_vlan_info info = { .tree_index = dp->ds->dst->index, @@ -1270,10 +1323,13 @@ int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid) .vid = vid, }; - return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info); + if (broadcast) + return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info); + + return dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info); } -void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast) { struct dsa_notifier_tag_8021q_vlan_info info = { .tree_index = dp->ds->dst->index, @@ -1283,8 +1339,12 @@ void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) }; int err; - err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info); + if (broadcast) + err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info); + else + err = dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info); if (err) - pr_err("DSA: failed to notify tag_8021q VLAN deletion: %pe\n", - ERR_PTR(err)); + dev_err(dp->ds->dev, + "port %d failed to notify tag_8021q VLAN %d deletion: %pe\n", + dp->index, vid, ERR_PTR(err)); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6e1135d3ee33..eb9d9e53c536 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -286,7 +286,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; - ret = dsa_port_set_state(dp, attr->u.stp_state); + ret = dsa_port_set_state(dp, attr->u.stp_state, true); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) @@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; - case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack); - break; default: ret = -EOPNOTSUPP; break; @@ -2287,8 +2281,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, static void dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) { + struct switchdev_notifier_fdb_info info = {}; struct dsa_switch *ds = switchdev_work->ds; - struct switchdev_notifier_fdb_info info; struct dsa_port *dp; if (!dsa_is_user_port(ds, switchdev_work->port)) diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 654697ebb6f3..f8f7b7c34e7d 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -362,12 +362,12 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, continue; /* Install the RX VID of the targeted port in our VLAN table */ - err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid); + err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true); if (err) return err; /* Install our RX VID into the targeted port's VLAN table */ - err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid); + err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true); if (err) return err; } @@ -398,10 +398,10 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, continue; /* Remove the RX VID of the targeted port from our VLAN table */ - dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid); + dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid, true); /* Remove our RX VID from the targeted port's VLAN table */ - dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid); + dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid, true); } return 0; @@ -413,7 +413,8 @@ int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, { u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); - return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid); + return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid, + true); } EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload); @@ -423,7 +424,7 @@ void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, { u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); - dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid); + dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true); } EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload); @@ -450,7 +451,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) * L2 forwarding rules still take precedence when there are no VLAN * restrictions, so there are no concerns about leaking traffic. */ - err = dsa_port_tag_8021q_vlan_add(dp, rx_vid); + err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false); if (err) { dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %pe\n", @@ -462,7 +463,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) vlan_vid_add(master, ctx->proto, rx_vid); /* Finally apply the TX VID on this port and on the CPU port */ - err = dsa_port_tag_8021q_vlan_add(dp, tx_vid); + err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false); if (err) { dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %pe\n", @@ -489,11 +490,11 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) master = dp->cpu_dp->master; - dsa_port_tag_8021q_vlan_del(dp, rx_vid); + dsa_port_tag_8021q_vlan_del(dp, rx_vid, false); vlan_vid_del(master, ctx->proto, rx_vid); - dsa_port_tag_8021q_vlan_del(dp, tx_vid); + dsa_port_tag_8021q_vlan_del(dp, tx_vid, false); } static int dsa_tag_8021q_setup(struct dsa_switch *ds) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 96e93b544a0d..96dbb8ee2fee 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -99,7 +99,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, skb_push(skb, BRCM_TAG_LEN); if (offset) - memmove(skb->data, skb->data + BRCM_TAG_LEN, offset); + dsa_alloc_etype_header(skb, BRCM_TAG_LEN); brcm_tag = skb->data + offset; @@ -190,10 +190,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev) if (!nskb) return nskb; - /* Move the Ethernet DA and SA */ - memmove(nskb->data - ETH_HLEN, - nskb->data - ETH_HLEN - BRCM_TAG_LEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, BRCM_TAG_LEN); return nskb; } @@ -231,7 +228,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, skb_push(skb, BRCM_LEG_TAG_LEN); - memmove(skb->data, skb->data + BRCM_LEG_TAG_LEN, 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, BRCM_LEG_TAG_LEN); brcm_tag = skb->data + 2 * ETH_ALEN; @@ -257,7 +254,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID))) return NULL; - brcm_tag = skb->data - 2; + brcm_tag = dsa_etype_header_pos_rx(skb); source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; @@ -270,10 +267,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, dsa_default_offload_fwd_mark(skb); - /* Move the Ethernet DA and SA */ - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - BRCM_LEG_TAG_LEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN); return skb; } diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index e32f8160e895..77d0ce89ab77 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -166,11 +166,11 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, if (skb->protocol == htons(ETH_P_8021Q)) { if (extra) { skb_push(skb, extra); - memmove(skb->data, skb->data + extra, 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, extra); } /* Construct tagged DSA tag from 802.1Q tag. */ - dsa_header = skb->data + 2 * ETH_ALEN + extra; + dsa_header = dsa_etype_header_pos_tx(skb) + extra; dsa_header[0] = (cmd << 6) | 0x20 | tag_dev; dsa_header[1] = tag_port << 3; @@ -181,10 +181,10 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, } } else { skb_push(skb, DSA_HLEN + extra); - memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, DSA_HLEN + extra); /* Construct untagged DSA tag. */ - dsa_header = skb->data + 2 * ETH_ALEN + extra; + dsa_header = dsa_etype_header_pos_tx(skb) + extra; dsa_header[0] = (cmd << 6) | tag_dev; dsa_header[1] = tag_port << 3; @@ -205,7 +205,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, u8 *dsa_header; /* The ethertype field is part of the DSA header. */ - dsa_header = skb->data - 2; + dsa_header = dsa_etype_header_pos_rx(skb); cmd = dsa_header[0] >> 6; switch (cmd) { @@ -312,14 +312,10 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, memcpy(dsa_header, new_header, DSA_HLEN); if (extra) - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - extra, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, extra); } else { skb_pull_rcsum(skb, DSA_HLEN); - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - DSA_HLEN - extra, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, DSA_HLEN + extra); } return skb; @@ -364,7 +360,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) if (!skb) return NULL; - edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header = dsa_etype_header_pos_tx(skb); edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; edsa_header[1] = ETH_P_EDSA & 0xff; edsa_header[2] = 0x00; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 58d3a0e712d2..cb548188f813 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -62,9 +62,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) skb_push(skb, LAN9303_TAG_LEN); /* make room between MACs and Ether-Type */ - memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, LAN9303_TAG_LEN); + + lan9303_tag = dsa_etype_header_pos_tx(skb); - lan9303_tag = (__be16 *)(skb->data + 2 * ETH_ALEN); tag = lan9303_xmit_use_arl(dp, skb->data) ? LAN9303_TAG_TX_USE_ALR : dp->index | LAN9303_TAG_TX_STP_OVERRIDE; @@ -86,13 +87,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) return NULL; } - /* '->data' points into the middle of our special VLAN tag information: - * - * ~ MAC src | 0x81 | 0x00 | 0xyy | 0xzz | ether type - * ^ - * ->data - */ - lan9303_tag = (__be16 *)(skb->data - 2); + lan9303_tag = dsa_etype_header_pos_rx(skb); if (lan9303_tag[0] != htons(ETH_P_8021Q)) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n"); @@ -112,8 +107,9 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) * and the current ethertype field. */ skb_pull_rcsum(skb, 2 + 2); - memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), - 2 * ETH_ALEN); + + dsa_strip_etype_header(skb, LAN9303_TAG_LEN); + if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU)) dsa_default_offload_fwd_mark(skb); diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index bbf37c031d44..415d8ece242a 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -41,10 +41,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, default: xmit_tpid = MTK_HDR_XMIT_UNTAGGED; skb_push(skb, MTK_HDR_LEN); - memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, MTK_HDR_LEN); } - mtk_tag = skb->data + 2 * ETH_ALEN; + mtk_tag = dsa_etype_header_pos_tx(skb); /* Mark tag attribute on special tag insertion to notify hardware * whether that's a combined special tag with 802.1Q header. @@ -70,19 +70,13 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev) if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) return NULL; - /* The MTK header is added by the switch between src addr - * and ethertype at this point, skb->data points to 2 bytes - * after src addr so header should be 2 bytes right before. - */ - phdr = (__be16 *)(skb->data - 2); + phdr = dsa_etype_header_pos_rx(skb); hdr = ntohs(*phdr); /* Remove MTK tag and recalculate checksum. */ skb_pull_rcsum(skb, MTK_HDR_LEN); - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - MTK_HDR_LEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, MTK_HDR_LEN); /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 6e3136990491..1ea9401b8ace 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -36,8 +36,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) skb_push(skb, QCA_HDR_LEN); - memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); - phdr = (__be16 *)(skb->data + 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, QCA_HDR_LEN); + phdr = dsa_etype_header_pos_tx(skb); /* Set the version field, and set destination port information */ hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | @@ -58,11 +58,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) return NULL; - /* The QCA header is added by the switch between src addr and Ethertype - * At this point, skb->data points to ethertype so header should be - * right before - */ - phdr = (__be16 *)(skb->data - 2); + phdr = dsa_etype_header_pos_rx(skb); hdr = ntohs(*phdr); /* Make sure the version is correct */ @@ -72,8 +68,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) /* Remove QCA tag and recalculate checksum */ skb_pull_rcsum(skb, QCA_HDR_LEN); - memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, - ETH_HLEN - QCA_HDR_LEN); + dsa_strip_etype_header(skb, QCA_HDR_LEN); /* Get source port information */ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index aaddca3c0245..40811bab4d09 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -47,8 +47,8 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, dp->index); skb_push(skb, RTL4_A_HDR_LEN); - memmove(skb->data, skb->data + RTL4_A_HDR_LEN, 2 * ETH_ALEN); - tag = skb->data + 2 * ETH_ALEN; + dsa_alloc_etype_header(skb, RTL4_A_HDR_LEN); + tag = dsa_etype_header_pos_tx(skb); /* Set Ethertype */ p = (__be16 *)tag; @@ -76,12 +76,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN))) return NULL; - /* The RTL4 header has its own custom Ethertype 0x8899 and that - * starts right at the beginning of the packet, after the src - * ethernet addr. Apparently skb->data always points 2 bytes in, - * behind the Ethertype. - */ - tag = skb->data - 2; + tag = dsa_etype_header_pos_rx(skb); p = (__be16 *)tag; etype = ntohs(*p); if (etype != RTL4_A_ETHERTYPE) { @@ -108,10 +103,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, /* Remove RTL4 tag and recalculate checksum */ skb_pull_rcsum(skb, RTL4_A_HDR_LEN); - /* Move ethernet DA and SA in front of the data */ - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - RTL4_A_HDR_LEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, RTL4_A_HDR_LEN); dsa_default_offload_fwd_mark(skb); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 38b2792f971d..5b80a9049e2c 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -116,9 +116,14 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) } /* Calls sja1105_port_deferred_xmit in sja1105_main.c */ -static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, +static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp, struct sk_buff *skb) { + struct sja1105_port *sp = dp->priv; + + if (!dsa_port_is_sja1105(dp)) + return skb; + /* Increase refcount so the kfree_skb in dsa_slave_xmit * won't really free the packet. */ @@ -128,8 +133,13 @@ static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, return NULL; } -static u16 sja1105_xmit_tpid(struct sja1105_port *sp) +static u16 sja1105_xmit_tpid(struct dsa_port *dp) { + struct sja1105_port *sp = dp->priv; + + if (unlikely(!dsa_port_is_sja1105(dp))) + return ETH_P_8021Q; + return sp->xmit_tpid; } @@ -155,7 +165,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, */ tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num); - return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), tx_vid); + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid); } static struct sk_buff *sja1105_xmit(struct sk_buff *skb, @@ -174,9 +184,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, * is the .port_deferred_xmit driver callback. */ if (unlikely(sja1105_is_link_local(skb))) - return sja1105_defer_xmit(dp->priv, skb); + return sja1105_defer_xmit(dp, skb); - return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); } @@ -188,7 +198,6 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - struct ethhdr *eth_hdr; __be32 *tx_trailer; __be16 *tx_header; int trailer_pos; @@ -201,33 +210,29 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, * tag_8021q TX VLANs. */ if (likely(!sja1105_is_link_local(skb))) - return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); skb_push(skb, SJA1110_HEADER_LEN); - /* Move Ethernet header to the left, making space for DSA tag */ - memmove(skb->data, skb->data + SJA1110_HEADER_LEN, 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, SJA1110_HEADER_LEN); trailer_pos = skb->len; - /* On TX, skb->data points to skb_mac_header(skb) */ - eth_hdr = (struct ethhdr *)skb->data; - tx_header = (__be16 *)(eth_hdr + 1); + tx_header = dsa_etype_header_pos_tx(skb); tx_trailer = skb_put(skb, SJA1110_TX_TRAILER_LEN); - eth_hdr->h_proto = htons(ETH_P_SJA1110); - - *tx_header = htons(SJA1110_HEADER_HOST_TO_SWITCH | - SJA1110_TX_HEADER_HAS_TRAILER | - SJA1110_TX_HEADER_TRAILER_POS(trailer_pos)); + tx_header[0] = htons(ETH_P_SJA1110); + tx_header[1] = htons(SJA1110_HEADER_HOST_TO_SWITCH | + SJA1110_TX_HEADER_HAS_TRAILER | + SJA1110_TX_HEADER_TRAILER_POS(trailer_pos)); *tx_trailer = cpu_to_be32(SJA1110_TX_TRAILER_PRIO(pcp) | SJA1110_TX_TRAILER_SWITCHID(dp->ds->index) | SJA1110_TX_TRAILER_DESTPORTS(BIT(dp->index))); if (clone) { u8 ts_id = SJA1105_SKB_CB(clone)->ts_id; - *tx_header |= htons(SJA1110_TX_HEADER_TAKE_TS); + tx_header[1] |= htons(SJA1110_TX_HEADER_TAKE_TS); *tx_trailer |= cpu_to_be32(SJA1110_TX_TRAILER_TSTAMP_ID(ts_id)); } @@ -270,16 +275,16 @@ static struct sk_buff bool is_link_local, bool is_meta) { - struct sja1105_port *sp; - struct dsa_port *dp; - - dp = dsa_slave_to_port(skb->dev); - sp = dp->priv; - /* Step 1: A timestampable frame was received. * Buffer it until we get its meta frame. */ if (is_link_local) { + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct sja1105_port *sp = dp->priv; + + if (unlikely(!dsa_port_is_sja1105(dp))) + return skb; + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) /* Do normal processing. */ return skb; @@ -312,8 +317,13 @@ static struct sk_buff * frame, which serves no further purpose). */ } else if (is_meta) { + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct sja1105_port *sp = dp->priv; struct sk_buff *stampable_skb; + if (unlikely(!dsa_port_is_sja1105(dp))) + return skb; + /* Drop the meta frame if we're not in the right state * to process it. */ @@ -443,11 +453,11 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) { + u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN; int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header); int n_ts = SJA1110_RX_HEADER_N_TS(rx_header); struct net_device *master = skb->dev; struct dsa_port *cpu_dp; - u8 *buf = skb->data + 2; struct dsa_switch *ds; int i; @@ -532,9 +542,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, /* Advance skb->data past the DSA header */ skb_pull_rcsum(skb, SJA1110_HEADER_LEN); - /* Remove the DSA header */ - memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - SJA1110_HEADER_LEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, SJA1110_HEADER_LEN); /* With skb->data in its final place, update the MAC header * so that eth_hdr() continues to works properly. diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index f8bca08e727e..1797a0a90019 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -35,7 +35,7 @@ int ethnl_ops_begin(struct net_device *dev) int ret; if (!dev) - return 0; + return -ENODEV; if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); @@ -61,7 +61,7 @@ err: void ethnl_ops_complete(struct net_device *dev) { - if (dev && dev->ethtool_ops->complete) + if (dev->ethtool_ops->complete) dev->ethtool_ops->complete(dev); if (dev->dev.parent) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 90233efa1f6b..7bb9ef35c570 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -983,6 +983,11 @@ static const struct proto_ops ieee802154_dgram_ops = { .sendpage = sock_no_sendpage, }; +static void ieee802154_sock_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + /* Create a socket. Initialise the socket, blank the addresses * set the state. */ @@ -1023,7 +1028,7 @@ static int ieee802154_create(struct net *net, struct socket *sock, sock->ops = ops; sock_init_data(sock, sk); - /* FIXME: sk->sk_destruct */ + sk->sk_destruct = ieee802154_sock_destruct; sk->sk_family = PF_IEEE802154; /* Checksums on by default */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7e5072722f05..d2e2b3d18c66 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t) static void igmp_ifc_timer_expire(struct timer_list *t) { struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); + u32 mr_ifc_count; igmpv3_send_cr(in_dev); - if (in_dev->mr_ifc_count) { - in_dev->mr_ifc_count--; +restart: + mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count); + + if (mr_ifc_count) { + if (cmpxchg(&in_dev->mr_ifc_count, + mr_ifc_count, + mr_ifc_count - 1) != mr_ifc_count) + goto restart; igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } @@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); igmp_ifc_start_timer(in_dev, 1); } @@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qri; } /* cancel the interface change timer */ - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); /* clear deleted report items */ @@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev) igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); in_dev->mr_gq_running = 0; @@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); @@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* else no filters; keep old mode for reports */ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 754013fa393b..f25d02ad4a8a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -534,7 +534,8 @@ out: atomic_read(&newsk->sk_rmem_alloc)); mem_cgroup_sk_alloc(newsk); if (newsk->sk_memcg && amt) - mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + mem_cgroup_charge_skmem(newsk->sk_memcg, amt, + GFP_KERNEL | __GFP_NOFAIL); release_sock(newsk); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 6922612df456..3de78416ec76 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -18,15 +18,12 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -static int __net_init arptable_filter_table_init(struct net *net); - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_ARP, .priority = NF_IP_PRI_FILTER, - .table_init = arptable_filter_table_init, }; /* The work comes in here from netfilter.c */ @@ -39,7 +36,7 @@ arptable_filter_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *arpfilter_ops __read_mostly; -static int __net_init arptable_filter_table_init(struct net *net) +static int arptable_filter_table_init(struct net *net) { struct arpt_replace *repl; int err; @@ -69,30 +66,32 @@ static struct pernet_operations arptable_filter_net_ops = { static int __init arptable_filter_init(void) { - int ret; + int ret = xt_register_template(&packet_filter, + arptable_filter_table_init); + + if (ret < 0) + return ret; arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook); - if (IS_ERR(arpfilter_ops)) + if (IS_ERR(arpfilter_ops)) { + xt_unregister_template(&packet_filter); return PTR_ERR(arpfilter_ops); + } ret = register_pernet_subsys(&arptable_filter_net_ops); if (ret < 0) { + xt_unregister_template(&packet_filter); kfree(arpfilter_ops); return ret; } - ret = arptable_filter_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&arptable_filter_net_ops); - kfree(arpfilter_ops); - } - return ret; } static void __exit arptable_filter_fini(void) { unregister_pernet_subsys(&arptable_filter_net_ops); + xt_unregister_template(&packet_filter); kfree(arpfilter_ops); } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 8f7ca67475b7..8fd1aba8af31 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -66,11 +66,22 @@ struct clusterip_net { /* lock protects the configs list */ spinlock_t lock; + bool clusterip_deprecated_warning; #ifdef CONFIG_PROC_FS struct proc_dir_entry *procdir; /* mutex protects the config->pde*/ struct mutex mutex; #endif + unsigned int hook_users; +}; + +static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); + +static const struct nf_hook_ops cip_arp_ops = { + .hook = clusterip_arp_mangle, + .pf = NFPROTO_ARP, + .hooknum = NF_ARP_OUT, + .priority = -1 }; static unsigned int clusterip_net_id __read_mostly; @@ -458,6 +469,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) static int clusterip_tg_check(const struct xt_tgchk_param *par) { struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + struct clusterip_net *cn = clusterip_pernet(par->net); const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; int ret, i; @@ -467,6 +479,9 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) return -EOPNOTSUPP; } + if (cn->hook_users == UINT_MAX) + return -EOVERFLOW; + if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { @@ -517,10 +532,23 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) return ret; } - if (!par->net->xt.clusterip_deprecated_warning) { + if (cn->hook_users == 0) { + ret = nf_register_net_hook(par->net, &cip_arp_ops); + + if (ret < 0) { + clusterip_config_entry_put(config); + clusterip_config_put(config); + nf_ct_netns_put(par->net, par->family); + return ret; + } + } + + cn->hook_users++; + + if (!cn->clusterip_deprecated_warning) { pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " "use xt_cluster instead\n"); - par->net->xt.clusterip_deprecated_warning = true; + cn->clusterip_deprecated_warning = true; } cipinfo->config = config; @@ -531,6 +559,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) { const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + struct clusterip_net *cn = clusterip_pernet(par->net); /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ @@ -539,6 +568,10 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) clusterip_config_put(cipinfo->config); nf_ct_netns_put(par->net, par->family); + cn->hook_users--; + + if (cn->hook_users == 0) + nf_unregister_net_hook(par->net, &cip_arp_ops); } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -602,9 +635,8 @@ static void arp_print(struct arp_payload *payload) #endif static unsigned int -arp_mangle(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) +clusterip_arp_mangle(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; @@ -654,13 +686,6 @@ arp_mangle(void *priv, return NF_ACCEPT; } -static const struct nf_hook_ops cip_arp_ops = { - .hook = arp_mangle, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_OUT, - .priority = -1 -}; - /*********************************************************************** * PROC DIR HANDLING ***********************************************************************/ @@ -817,20 +842,14 @@ static const struct proc_ops clusterip_proc_ops = { static int clusterip_net_init(struct net *net) { struct clusterip_net *cn = clusterip_pernet(net); - int ret; INIT_LIST_HEAD(&cn->configs); spin_lock_init(&cn->lock); - ret = nf_register_net_hook(net, &cip_arp_ops); - if (ret < 0) - return ret; - #ifdef CONFIG_PROC_FS cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); if (!cn->procdir) { - nf_unregister_net_hook(net, &cip_arp_ops); pr_err("Unable to proc dir entry\n"); return -ENOMEM; } @@ -850,7 +869,6 @@ static void clusterip_net_exit(struct net *net) cn->procdir = NULL; mutex_unlock(&cn->mutex); #endif - nf_unregister_net_hook(net, &cip_arp_ops); } static struct pernet_operations clusterip_net_ops = { diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 8272df7c6ad5..0eb0e2ab9bfc 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -19,7 +19,6 @@ MODULE_DESCRIPTION("iptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static int __net_init iptable_filter_table_init(struct net *net); static const struct xt_table packet_filter = { .name = "filter", @@ -27,7 +26,6 @@ static const struct xt_table packet_filter = { .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_FILTER, - .table_init = iptable_filter_table_init, }; static unsigned int @@ -43,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly; static bool forward __read_mostly = true; module_param(forward, bool, 0000); -static int __net_init iptable_filter_table_init(struct net *net) +static int iptable_filter_table_init(struct net *net) { struct ipt_replace *repl; int err; @@ -62,7 +60,7 @@ static int __net_init iptable_filter_table_init(struct net *net) static int __net_init iptable_filter_net_init(struct net *net) { - if (net == &init_net || !forward) + if (!forward) return iptable_filter_table_init(net); return 0; @@ -86,22 +84,32 @@ static struct pernet_operations iptable_filter_net_ops = { static int __init iptable_filter_init(void) { - int ret; + int ret = xt_register_template(&packet_filter, + iptable_filter_table_init); + + if (ret < 0) + return ret; filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); - if (IS_ERR(filter_ops)) + if (IS_ERR(filter_ops)) { + xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); + } ret = register_pernet_subsys(&iptable_filter_net_ops); - if (ret < 0) + if (ret < 0) { + xt_unregister_template(&packet_filter); kfree(filter_ops); + return ret; + } - return ret; + return 0; } static void __exit iptable_filter_fini(void) { unregister_pernet_subsys(&iptable_filter_net_ops); + xt_unregister_template(&packet_filter); kfree(filter_ops); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 2abc3836f391..b52a4c8a14fc 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -static int __net_init iptable_mangle_table_init(struct net *net); - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_MANGLE, - .table_init = iptable_mangle_table_init, }; static unsigned int @@ -83,7 +80,7 @@ iptable_mangle_hook(void *priv, } static struct nf_hook_ops *mangle_ops __read_mostly; -static int __net_init iptable_mangle_table_init(struct net *net) +static int iptable_mangle_table_init(struct net *net) { struct ipt_replace *repl; int ret; @@ -113,32 +110,30 @@ static struct pernet_operations iptable_mangle_net_ops = { static int __init iptable_mangle_init(void) { - int ret; + int ret = xt_register_template(&packet_mangler, + iptable_mangle_table_init); mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); if (IS_ERR(mangle_ops)) { + xt_unregister_template(&packet_mangler); ret = PTR_ERR(mangle_ops); return ret; } ret = register_pernet_subsys(&iptable_mangle_net_ops); if (ret < 0) { + xt_unregister_template(&packet_mangler); kfree(mangle_ops); return ret; } - ret = iptable_mangle_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&iptable_mangle_net_ops); - kfree(mangle_ops); - } - return ret; } static void __exit iptable_mangle_fini(void) { unregister_pernet_subsys(&iptable_mangle_net_ops); + xt_unregister_template(&packet_mangler); kfree(mangle_ops); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index a9913842ef18..45d7e072e6a5 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -17,8 +17,6 @@ struct iptable_nat_pernet { struct nf_hook_ops *nf_nat_ops; }; -static int __net_init iptable_nat_table_init(struct net *net); - static unsigned int iptable_nat_net_id __read_mostly; static const struct xt_table nf_nat_ipv4_table = { @@ -29,7 +27,6 @@ static const struct xt_table nf_nat_ipv4_table = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, .af = NFPROTO_IPV4, - .table_init = iptable_nat_table_init, }; static unsigned int iptable_nat_do_chain(void *priv, @@ -113,7 +110,7 @@ static void ipt_nat_unregister_lookups(struct net *net) kfree(ops); } -static int __net_init iptable_nat_table_init(struct net *net) +static int iptable_nat_table_init(struct net *net) { struct ipt_replace *repl; int ret; @@ -155,20 +152,25 @@ static struct pernet_operations iptable_nat_net_ops = { static int __init iptable_nat_init(void) { - int ret = register_pernet_subsys(&iptable_nat_net_ops); + int ret = xt_register_template(&nf_nat_ipv4_table, + iptable_nat_table_init); + + if (ret < 0) + return ret; - if (ret) + ret = register_pernet_subsys(&iptable_nat_net_ops); + if (ret < 0) { + xt_unregister_template(&nf_nat_ipv4_table); return ret; + } - ret = iptable_nat_table_init(&init_net); - if (ret) - unregister_pernet_subsys(&iptable_nat_net_ops); return ret; } static void __exit iptable_nat_exit(void) { unregister_pernet_subsys(&iptable_nat_net_ops); + xt_unregister_template(&nf_nat_ipv4_table); } module_init(iptable_nat_init); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index ceef397c1f5f..b88e0f36cd05 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -12,8 +12,6 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static int __net_init iptable_raw_table_init(struct net *net); - static bool raw_before_defrag __read_mostly; MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); module_param(raw_before_defrag, bool, 0000); @@ -24,7 +22,6 @@ static const struct xt_table packet_raw = { .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_RAW, - .table_init = iptable_raw_table_init, }; static const struct xt_table packet_raw_before_defrag = { @@ -33,7 +30,6 @@ static const struct xt_table packet_raw_before_defrag = { .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, - .table_init = iptable_raw_table_init, }; /* The work comes in here from netfilter.c. */ @@ -89,22 +85,24 @@ static int __init iptable_raw_init(void) pr_info("Enabling raw table before defrag\n"); } + ret = xt_register_template(table, + iptable_raw_table_init); + if (ret < 0) + return ret; + rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook); - if (IS_ERR(rawtable_ops)) + if (IS_ERR(rawtable_ops)) { + xt_unregister_template(table); return PTR_ERR(rawtable_ops); + } ret = register_pernet_subsys(&iptable_raw_net_ops); if (ret < 0) { + xt_unregister_template(table); kfree(rawtable_ops); return ret; } - ret = iptable_raw_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&iptable_raw_net_ops); - kfree(rawtable_ops); - } - return ret; } @@ -112,6 +110,7 @@ static void __exit iptable_raw_fini(void) { unregister_pernet_subsys(&iptable_raw_net_ops); kfree(rawtable_ops); + xt_unregister_template(&packet_raw); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 77973f5fd8f6..f519162a2fa5 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static int __net_init iptable_security_table_init(struct net *net); - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_SECURITY, - .table_init = iptable_security_table_init, }; static unsigned int @@ -45,7 +42,7 @@ iptable_security_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *sectbl_ops __read_mostly; -static int __net_init iptable_security_table_init(struct net *net) +static int iptable_security_table_init(struct net *net) { struct ipt_replace *repl; int ret; @@ -75,24 +72,25 @@ static struct pernet_operations iptable_security_net_ops = { static int __init iptable_security_init(void) { - int ret; + int ret = xt_register_template(&security_table, + iptable_security_table_init); + + if (ret < 0) + return ret; sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); - if (IS_ERR(sectbl_ops)) + if (IS_ERR(sectbl_ops)) { + xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); + } ret = register_pernet_subsys(&iptable_security_net_ops); if (ret < 0) { + xt_unregister_template(&security_table); kfree(sectbl_ops); return ret; } - ret = iptable_security_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&iptable_security_net_ops); - kfree(sectbl_ops); - } - return ret; } @@ -100,6 +98,7 @@ static void __exit iptable_security_fini(void) { unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); + xt_unregister_template(&security_table); } module_init(iptable_security_init); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6ea3dc2e4219..6274462b86b4 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1041,7 +1041,7 @@ static void bbr_init(struct sock *sk) bbr->prior_cwnd = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; bbr->rtt_cnt = 0; - bbr->next_rtt_delivered = 0; + bbr->next_rtt_delivered = tp->delivered; bbr->prev_ca_state = TCP_CA_Open; bbr->packet_conservation = 0; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 62ba8d0f2c60..59412d6354a0 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -368,8 +368,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, return NULL; } - if (syn_data && - tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) + if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len == 0) { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e09147ac9a99..fc61cd3fea65 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb) if (th->cwr) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } EXPORT_SYMBOL(tcp_gro_complete); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 29553fce8502..6d72f3ea48c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3373,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size) sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) - mem_cgroup_charge_skmem(sk->sk_memcg, amt); + mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge() | __GFP_NOFAIL); } /* Send a FIN. The caller locks the socket for us. diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9dde1e5fb449..1380a6b6f4ff 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb) skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index bb784ea7bbd3..727ee8097012 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -19,15 +19,12 @@ MODULE_DESCRIPTION("ip6tables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static int __net_init ip6table_filter_table_init(struct net *net); - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_FILTER, - .table_init = ip6table_filter_table_init, }; /* The work comes in here from netfilter.c. */ @@ -44,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly; static bool forward = true; module_param(forward, bool, 0000); -static int __net_init ip6table_filter_table_init(struct net *net) +static int ip6table_filter_table_init(struct net *net) { struct ip6t_replace *repl; int err; @@ -63,7 +60,7 @@ static int __net_init ip6table_filter_table_init(struct net *net) static int __net_init ip6table_filter_net_init(struct net *net) { - if (net == &init_net || !forward) + if (!forward) return ip6table_filter_table_init(net); return 0; @@ -87,15 +84,24 @@ static struct pernet_operations ip6table_filter_net_ops = { static int __init ip6table_filter_init(void) { - int ret; + int ret = xt_register_template(&packet_filter, + ip6table_filter_table_init); + + if (ret < 0) + return ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); - if (IS_ERR(filter_ops)) + if (IS_ERR(filter_ops)) { + xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); + } ret = register_pernet_subsys(&ip6table_filter_net_ops); - if (ret < 0) + if (ret < 0) { + xt_unregister_template(&packet_filter); kfree(filter_ops); + return ret; + } return ret; } @@ -103,6 +109,7 @@ static int __init ip6table_filter_init(void) static void __exit ip6table_filter_fini(void) { unregister_pernet_subsys(&ip6table_filter_net_ops); + xt_unregister_template(&packet_filter); kfree(filter_ops); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c76cffd63041..9b518ce37d6a 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -20,15 +20,12 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -static int __net_init ip6table_mangle_table_init(struct net *net); - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_MANGLE, - .table_init = ip6table_mangle_table_init, }; static unsigned int @@ -76,7 +73,7 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, } static struct nf_hook_ops *mangle_ops __read_mostly; -static int __net_init ip6table_mangle_table_init(struct net *net) +static int ip6table_mangle_table_init(struct net *net) { struct ip6t_replace *repl; int ret; @@ -106,29 +103,32 @@ static struct pernet_operations ip6table_mangle_net_ops = { static int __init ip6table_mangle_init(void) { - int ret; + int ret = xt_register_template(&packet_mangler, + ip6table_mangle_table_init); + + if (ret < 0) + return ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) + if (IS_ERR(mangle_ops)) { + xt_unregister_template(&packet_mangler); return PTR_ERR(mangle_ops); + } ret = register_pernet_subsys(&ip6table_mangle_net_ops); if (ret < 0) { + xt_unregister_template(&packet_mangler); kfree(mangle_ops); return ret; } - ret = ip6table_mangle_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&ip6table_mangle_net_ops); - kfree(mangle_ops); - } return ret; } static void __exit ip6table_mangle_fini(void) { unregister_pernet_subsys(&ip6table_mangle_net_ops); + xt_unregister_template(&packet_mangler); kfree(mangle_ops); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index b0292251e655..921c1723a01e 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -19,8 +19,6 @@ struct ip6table_nat_pernet { struct nf_hook_ops *nf_nat_ops; }; -static int __net_init ip6table_nat_table_init(struct net *net); - static unsigned int ip6table_nat_net_id __read_mostly; static const struct xt_table nf_nat_ipv6_table = { @@ -31,7 +29,6 @@ static const struct xt_table nf_nat_ipv6_table = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, .af = NFPROTO_IPV6, - .table_init = ip6table_nat_table_init, }; static unsigned int ip6table_nat_do_chain(void *priv, @@ -115,7 +112,7 @@ static void ip6t_nat_unregister_lookups(struct net *net) kfree(ops); } -static int __net_init ip6table_nat_table_init(struct net *net) +static int ip6table_nat_table_init(struct net *net) { struct ip6t_replace *repl; int ret; @@ -157,20 +154,23 @@ static struct pernet_operations ip6table_nat_net_ops = { static int __init ip6table_nat_init(void) { - int ret = register_pernet_subsys(&ip6table_nat_net_ops); + int ret = xt_register_template(&nf_nat_ipv6_table, + ip6table_nat_table_init); - if (ret) + if (ret < 0) return ret; - ret = ip6table_nat_table_init(&init_net); + ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret) - unregister_pernet_subsys(&ip6table_nat_net_ops); + xt_unregister_template(&nf_nat_ipv6_table); + return ret; } static void __exit ip6table_nat_exit(void) { unregister_pernet_subsys(&ip6table_nat_net_ops); + xt_unregister_template(&nf_nat_ipv6_table); } module_init(ip6table_nat_init); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index f63c106c521e..4f2a04af71d3 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -11,8 +11,6 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static int __net_init ip6table_raw_table_init(struct net *net); - static bool raw_before_defrag __read_mostly; MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); module_param(raw_before_defrag, bool, 0000); @@ -23,7 +21,6 @@ static const struct xt_table packet_raw = { .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW, - .table_init = ip6table_raw_table_init, }; static const struct xt_table packet_raw_before_defrag = { @@ -32,7 +29,6 @@ static const struct xt_table packet_raw_before_defrag = { .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, - .table_init = ip6table_raw_table_init, }; /* The work comes in here from netfilter.c. */ @@ -45,7 +41,7 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *rawtable_ops __read_mostly; -static int __net_init ip6table_raw_table_init(struct net *net) +static int ip6table_raw_table_init(struct net *net) { struct ip6t_replace *repl; const struct xt_table *table = &packet_raw; @@ -79,37 +75,39 @@ static struct pernet_operations ip6table_raw_net_ops = { static int __init ip6table_raw_init(void) { - int ret; const struct xt_table *table = &packet_raw; + int ret; if (raw_before_defrag) { table = &packet_raw_before_defrag; - pr_info("Enabling raw table before defrag\n"); } + ret = xt_register_template(table, ip6table_raw_table_init); + if (ret < 0) + return ret; + /* Register hooks */ rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); - if (IS_ERR(rawtable_ops)) + if (IS_ERR(rawtable_ops)) { + xt_unregister_template(table); return PTR_ERR(rawtable_ops); + } ret = register_pernet_subsys(&ip6table_raw_net_ops); if (ret < 0) { kfree(rawtable_ops); + xt_unregister_template(table); return ret; } - ret = ip6table_raw_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&ip6table_raw_net_ops); - kfree(rawtable_ops); - } return ret; } static void __exit ip6table_raw_fini(void) { unregister_pernet_subsys(&ip6table_raw_net_ops); + xt_unregister_template(&packet_raw); kfree(rawtable_ops); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 8dc335cf450b..931674034d8b 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -24,15 +24,12 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static int __net_init ip6table_security_table_init(struct net *net); - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_SECURITY, - .table_init = ip6table_security_table_init, }; static unsigned int @@ -44,7 +41,7 @@ ip6table_security_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *sectbl_ops __read_mostly; -static int __net_init ip6table_security_table_init(struct net *net) +static int ip6table_security_table_init(struct net *net) { struct ip6t_replace *repl; int ret; @@ -74,29 +71,32 @@ static struct pernet_operations ip6table_security_net_ops = { static int __init ip6table_security_init(void) { - int ret; + int ret = xt_register_template(&security_table, + ip6table_security_table_init); + + if (ret < 0) + return ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); - if (IS_ERR(sectbl_ops)) + if (IS_ERR(sectbl_ops)) { + xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); + } ret = register_pernet_subsys(&ip6table_security_net_ops); if (ret < 0) { kfree(sectbl_ops); + xt_unregister_template(&security_table); return ret; } - ret = ip6table_security_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&ip6table_security_net_ops); - kfree(sectbl_ops); - } return ret; } static void __exit ip6table_security_fini(void) { unregister_pernet_subsys(&ip6table_security_net_ops); + xt_unregister_template(&security_table); kfree(sectbl_ops); } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 44453b35c7b7..18316ee3c692 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1044,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (err == 0) { atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); - kfree_skb(skb); + consume_skb(skb); } /* this error should never happen since the */ @@ -1293,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, } } - kfree_skb(skb); + consume_skb(skb); if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_recv); if (atomic_read(&iucv->msg_recv) > iucv->msglimit) { @@ -1756,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); if (this) { - kfree_skb(this); + consume_skb(this); /* wake up any process waiting for sending */ iucv_sock_wake_msglim(sk); } @@ -1903,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); - if (!iucv) - goto out; - if (sk->sk_state != IUCV_BOUND) - goto out; + if (!iucv || sk->sk_state != IUCV_BOUND) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); iucv->msglimit_peer = iucv_trans_hdr(skb)->window; sk->sk_state = IUCV_CONNECTED; sk->sk_state_change(sk); bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -1924,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); - if (!iucv) - goto out; - if (sk->sk_state != IUCV_BOUND) - goto out; + if (!iucv || sk->sk_state != IUCV_BOUND) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -1945,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) struct iucv_sock *iucv = iucv_sk(sk); /* other end of connection closed */ - if (!iucv) - goto out; + if (!iucv) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); if (sk->sk_state == IUCV_CONNECTED) { sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); } bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -2107,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, case (AF_IUCV_FLAG_WIN): err = afiucv_hs_callback_win(sk, skb); if (skb->len == sizeof(struct af_iucv_trans_hdr)) { - kfree_skb(skb); + consume_skb(skb); break; } fallthrough; /* and receive non-zero length data */ @@ -2262,21 +2264,11 @@ static struct packet_type iucv_packet_type = { .func = afiucv_hs_rcv, }; -static int afiucv_iucv_init(void) -{ - return pr_iucv->iucv_register(&af_iucv_handler, 0); -} - -static void afiucv_iucv_exit(void) -{ - pr_iucv->iucv_unregister(&af_iucv_handler, 0); -} - static int __init afiucv_init(void) { int err; - if (MACHINE_IS_VM) { + if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) { cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); if (unlikely(err)) { WARN_ON(err); @@ -2284,11 +2276,7 @@ static int __init afiucv_init(void) goto out; } - pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv"); - if (!pr_iucv) { - printk(KERN_WARNING "iucv_if lookup failed\n"); - memset(&iucv_userid, 0, sizeof(iucv_userid)); - } + pr_iucv = &iucv_if; } else { memset(&iucv_userid, 0, sizeof(iucv_userid)); pr_iucv = NULL; @@ -2302,7 +2290,7 @@ static int __init afiucv_init(void) goto out_proto; if (pr_iucv) { - err = afiucv_iucv_init(); + err = pr_iucv->iucv_register(&af_iucv_handler, 0); if (err) goto out_sock; } @@ -2316,23 +2304,19 @@ static int __init afiucv_init(void) out_notifier: if (pr_iucv) - afiucv_iucv_exit(); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); out_sock: sock_unregister(PF_IUCV); out_proto: proto_unregister(&iucv_proto); out: - if (pr_iucv) - symbol_put(iucv_if); return err; } static void __exit afiucv_exit(void) { - if (pr_iucv) { - afiucv_iucv_exit(); - symbol_put(iucv_if); - } + if (pr_iucv) + pr_iucv->iucv_unregister(&af_iucv_handler, 0); unregister_netdevice_notifier(&afiucv_netdev_notifier); dev_remove_pack(&iucv_packet_type); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index e6795d5a546a..f3343a8541a5 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS]; */ static inline int __iucv_call_b2f0(int command, union iucv_param *parm) { - register unsigned long reg0 asm ("0"); - register unsigned long reg1 asm ("1"); - int ccode; + int cc; - reg0 = command; - reg1 = (unsigned long)parm; asm volatile( - " .long 0xb2f01000\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1) - : "m" (*parm) : "cc"); - return ccode; + " lgr 0,%[reg0]\n" + " lgr 1,%[reg1]\n" + " .long 0xb2f01000\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=&d" (cc), "+m" (*parm) + : [reg0] "d" ((unsigned long)command), + [reg1] "d" ((unsigned long)parm) + : "cc", "0", "1"); + return cc; } static inline int iucv_call_b2f0(int command, union iucv_param *parm) @@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm) */ static int __iucv_query_maxconn(void *param, unsigned long *max_pathid) { - register unsigned long reg0 asm ("0"); - register unsigned long reg1 asm ("1"); - int ccode; + unsigned long reg1 = (unsigned long)param; + int cc; - reg0 = IUCV_QUERY; - reg1 = (unsigned long) param; asm volatile ( + " lghi 0,%[cmd]\n" + " lgr 1,%[reg1]\n" " .long 0xb2f01000\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc"); + " ipm %[cc]\n" + " srl %[cc],28\n" + " lgr %[reg1],1\n" + : [cc] "=&d" (cc), [reg1] "+&d" (reg1) + : [cmd] "K" (IUCV_QUERY) + : "cc", "0", "1"); *max_pathid = reg1; - return ccode; + return cc; } static int iucv_query_maxconn(void) @@ -500,14 +502,14 @@ static void iucv_setmask_mp(void) { int cpu; - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) /* Enable all cpus with a declared buffer. */ if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) && !cpumask_test_cpu(cpu, &iucv_irq_cpumask)) smp_call_function_single(cpu, iucv_allow_cpu, NULL, 1); - put_online_cpus(); + cpus_read_unlock(); } /** @@ -540,7 +542,7 @@ static int iucv_enable(void) size_t alloc_size; int cpu, rc; - get_online_cpus(); + cpus_read_lock(); rc = -ENOMEM; alloc_size = iucv_max_pathid * sizeof(struct iucv_path); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); @@ -553,12 +555,12 @@ static int iucv_enable(void) if (cpumask_empty(&iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out; - put_online_cpus(); + cpus_read_unlock(); return 0; out: kfree(iucv_path_table); iucv_path_table = NULL; - put_online_cpus(); + cpus_read_unlock(); return rc; } @@ -571,11 +573,11 @@ out: */ static void iucv_disable(void) { - get_online_cpus(); + cpus_read_lock(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); kfree(iucv_path_table); iucv_path_table = NULL; - put_online_cpus(); + cpus_read_unlock(); } static int iucv_cpu_dead(unsigned int cpu) @@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this, if (cpumask_empty(&iucv_irq_cpumask)) return NOTIFY_DONE; - get_online_cpus(); + cpus_read_lock(); on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1); preempt_disable(); for (i = 0; i < iucv_max_pathid; i++) { @@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this, iucv_sever_pathid(i, NULL); } preempt_enable(); - put_online_cpus(); + cpus_read_unlock(); iucv_disable(); return NOTIFY_DONE; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 05f4c3c72619..fcae76ddd586 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -260,6 +260,8 @@ static void ieee80211_restart_work(struct work_struct *work) flush_work(&local->radar_detected_work); rtnl_lock(); + /* we might do interface manipulations, so need both */ + wiphy_lock(local->hw.wiphy); WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), "%s called with hardware scan in progress\n", __func__); diff --git a/net/mctp/route.c b/net/mctp/route.c index b3101375c8e7..5265525011ad 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -710,8 +710,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, /* route management */ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, unsigned int daddr_extent, unsigned int mtu, - bool is_local) + unsigned char type) { + int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *ert; @@ -721,6 +722,17 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) return -EINVAL; + switch (type) { + case RTN_LOCAL: + rtfn = mctp_route_input; + break; + case RTN_UNICAST: + rtfn = mctp_route_output; + break; + default: + return -EINVAL; + } + rt = mctp_route_alloc(); if (!rt) return -ENOMEM; @@ -730,7 +742,8 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, rt->mtu = mtu; rt->dev = mdev; dev_hold(rt->dev->dev); - rt->output = is_local ? mctp_route_input : mctp_route_output; + rt->type = type; + rt->output = rtfn; ASSERT_RTNL(); /* Prevent duplicate identical routes. */ @@ -777,7 +790,7 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) { - return mctp_route_add(mdev, addr, 0, 0, true); + return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); } int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) @@ -936,7 +949,11 @@ static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, /* TODO: parse mtu from nlparse */ mtu = 0; - rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, false); + if (rtm->rtm_type != RTN_UNICAST) + return -EINVAL; + + rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, + rtm->rtm_type); return rc; } @@ -985,7 +1002,7 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, hdr->rtm_table = RT_TABLE_DEFAULT; hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ - hdr->rtm_type = RTN_ANYCAST; /* TODO: type from route */ + hdr->rtm_type = rt->type; if (nla_put_u8(skb, RTA_DST, rt->min)) goto cancel; diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 7d738bd06f2c..8b235468c88f 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -21,43 +21,50 @@ struct mptcp_pernet { struct ctl_table_header *ctl_table_hdr; #endif - u8 mptcp_enabled; unsigned int add_addr_timeout; + unsigned int stale_loss_cnt; + u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; }; -static struct mptcp_pernet *mptcp_get_pernet(struct net *net) +static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) { return net_generic(net, mptcp_pernet_id); } -int mptcp_is_enabled(struct net *net) +int mptcp_is_enabled(const struct net *net) { return mptcp_get_pernet(net)->mptcp_enabled; } -unsigned int mptcp_get_add_addr_timeout(struct net *net) +unsigned int mptcp_get_add_addr_timeout(const struct net *net) { return mptcp_get_pernet(net)->add_addr_timeout; } -int mptcp_is_checksum_enabled(struct net *net) +int mptcp_is_checksum_enabled(const struct net *net) { return mptcp_get_pernet(net)->checksum_enabled; } -int mptcp_allow_join_id0(struct net *net) +int mptcp_allow_join_id0(const struct net *net) { return mptcp_get_pernet(net)->allow_join_initial_addr_port; } +unsigned int mptcp_stale_loss_cnt(const struct net *net) +{ + return mptcp_get_pernet(net)->stale_loss_cnt; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; + pernet->stale_loss_cnt = 4; } #ifdef CONFIG_SYSCTL @@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, + { + .procname = "stale_loss_cnt", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + }, {} }; @@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[1].data = &pernet->add_addr_timeout; table[2].data = &pernet->checksum_enabled; table[3].data = &pernet->allow_join_initial_addr_port; + table[4].data = &pernet->stale_loss_cnt; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index ff2cc0e3273d..3a7c4e7b2d79 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -45,6 +45,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), + SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), + SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 0663cb12b448..8ec16c991aac 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -38,6 +38,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ + MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */ + MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4452455aef7f..bebb759f470e 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -885,20 +885,16 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, return subflow->mp_capable; } - if (mp_opt->dss && mp_opt->use_ack) { + if ((mp_opt->dss && mp_opt->use_ack) || + (mp_opt->add_addr && !mp_opt->echo)) { /* subflows are fully established as soon as we get any - * additional ack. + * additional ack, including ADD_ADDR. */ subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); goto fully_established; } - if (mp_opt->add_addr) { - WRITE_ONCE(msk->fully_established, true); - return true; - } - /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP. Fallback scenarios requires a reset for * MP_JOIN subflows. @@ -975,9 +971,11 @@ static void ack_update_msk(struct mptcp_sock *msk, old_snd_una = msk->snd_una; new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); - /* ACK for data not even sent yet? Ignore. */ - if (after64(new_snd_una, snd_nxt)) - new_snd_una = old_snd_una; + /* ACK for data not even sent yet and even above recovery bound? Ignore.*/ + if (unlikely(after64(new_snd_una, snd_nxt))) { + if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt)) + new_snd_una = old_snd_una; + } new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 639271e09604..0ed3e565f8f8 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -10,6 +10,8 @@ #include <net/mptcp.h> #include "protocol.h" +#include "mib.h" + /* path manager command handlers */ int mptcp_pm_announce_addr(struct mptcp_sock *msk, @@ -308,6 +310,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, skc); } +void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); + + /* keep track of rtx periods with no progress */ + if (!subflow->stale_count) { + subflow->stale_rcv_tstamp = rcv_tstamp; + subflow->stale_count++; + } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { + if (subflow->stale_count < U8_MAX) + subflow->stale_count++; + mptcp_pm_nl_subflow_chk_stale(msk, ssk); + } else { + subflow->stale_count = 0; + mptcp_subflow_set_active(subflow); + } +} + void mptcp_pm_data_init(struct mptcp_sock *msk) { msk->pm.add_addr_signaled = 0; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d2591ebf01d9..480f43ec1bfb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry { struct mptcp_addr_info addr; u8 flags; int ifindex; - struct rcu_head rcu; struct socket *lsk; }; @@ -47,6 +46,7 @@ struct pm_nl_pernet { spinlock_t lock; struct list_head local_addr_list; unsigned int addrs; + unsigned int stale_loss_cnt; unsigned int add_addr_signal_max; unsigned int add_addr_accept_max; unsigned int local_addr_max; @@ -410,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } +static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr, + struct mptcp_addr_info *addr) +{ + int i; + + for (i = 0; i < nr; i++) { + if (addresses_equal(&addrs[i], addr, addr->port)) + return true; + } + + return false; +} + +/* Fill all the remote addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, + struct mptcp_addr_info *addrs) +{ + struct sock *sk = (struct sock *)msk, *ssk; + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info remote = { 0 }; + unsigned int subflows_max; + int i = 0; + + subflows_max = mptcp_pm_get_subflows_max(msk); + + /* Non-fullmesh endpoint, fill in the single entry + * corresponding to the primary MPC subflow remote address + */ + if (!fullmesh) { + remote_address((struct sock_common *)sk, &remote); + msk->pm.subflows++; + addrs[i++] = remote; + } else { + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + remote_address((struct sock_common *)ssk, &remote); + if (!lookup_address_in_vec(addrs, i, &remote) && + msk->pm.subflows < subflows_max) { + msk->pm.subflows++; + addrs[i++] = remote; + } + } + } + + return i; +} + static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -455,15 +504,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) !READ_ONCE(msk->pm.remote_deny_join_id0)) { local = select_local_address(pernet, msk); if (local) { - struct mptcp_addr_info remote = { 0 }; + bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + int i, nr; msk->pm.local_addr_used++; - msk->pm.subflows++; check_work_pending(msk); - remote_address((struct sock_common *)sk, &remote); + nr = fill_remote_addresses_vec(msk, fullmesh, addrs); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local->addr, &remote, - local->flags, local->ifindex); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); spin_lock_bh(&msk->pm.lock); return; } @@ -484,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) mptcp_pm_create_subflow_or_signal_addr(msk); } +/* Fill all the local addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *addrs) +{ + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info local; + struct pm_nl_pernet *pernet; + unsigned int subflows_max; + int i = 0; + + pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + subflows_max = mptcp_pm_get_subflows_max(msk); + + rcu_read_lock(); + __mptcp_flush_join_list(msk); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) + continue; + + if (entry->addr.family != sk->sk_family) { +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if ((entry->addr.family == AF_INET && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) || + (sk->sk_family == AF_INET && + !ipv6_addr_v4mapped(&entry->addr.addr6))) +#endif + continue; + } + + if (msk->pm.subflows < subflows_max) { + msk->pm.subflows++; + addrs[i++] = entry->addr; + } + } + rcu_read_unlock(); + + /* If the array is empty, fill in the single + * 'IPADDRANY' local address + */ + if (!i) { + memset(&local, 0, sizeof(local)); + local.family = msk->pm.remote.family; + + msk->pm.subflows++; + addrs[i++] = local; + } + + return i; +} + static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; struct mptcp_addr_info remote; - struct mptcp_addr_info local; unsigned int subflows_max; + int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); @@ -502,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) goto add_addr_echo; - msk->pm.add_addr_accepted++; - msk->pm.subflows++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ remote = msk->pm.remote; if (!remote.port) remote.port = sk->sk_dport; - memset(&local, 0, sizeof(local)); - local.family = remote.family; + nr = fill_local_addresses_vec(msk, addrs); + + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local, &remote, 0, 0); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &addrs[i], &remote); spin_lock_bh(&msk->pm.lock); add_addr_echo: @@ -900,6 +1003,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, }; +void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = (struct sock *)msk; + unsigned int active_max_loss_cnt; + struct net *net = sock_net(sk); + unsigned int stale_loss_cnt; + bool slow; + + stale_loss_cnt = mptcp_stale_loss_cnt(net); + if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) + return; + + /* look for another available subflow not in loss state */ + active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); + mptcp_for_each_subflow(msk, iter) { + if (iter != subflow && mptcp_subflow_active(iter) && + iter->stale_count < active_max_loss_cnt) { + /* we have some alternatives, try to mark this subflow as idle ...*/ + slow = lock_sock_fast(ssk); + if (!tcp_rtx_and_write_queues_empty(ssk)) { + subflow->stale = 1; + __mptcp_retransmit_pending_data(sk); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE); + } + unlock_sock_fast(ssk, slow); + + /* always try to push the pending data regarless of re-injections: + * we can possibly use backup subflows now, and subflow selection + * is cheap under the msk socket lock + */ + __mptcp_push_pending(sk, 0); + return; + } + } +} + static int mptcp_pm_family_to_addr(int family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1068,6 +1208,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, + u8 *flags, int *ifindex) +{ + struct mptcp_pm_addr_entry *entry; + + *flags = 0; + *ifindex = 0; + + if (id) { + rcu_read_lock(); + entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id); + if (entry) { + *flags = entry->flags; + *ifindex = entry->ifindex; + } + rcu_read_unlock(); + } + + return 0; +} + static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr) { @@ -1136,36 +1297,12 @@ next: return 0; } -struct addr_entry_release_work { - struct rcu_work rwork; - struct mptcp_pm_addr_entry *entry; -}; - -static void mptcp_pm_release_addr_entry(struct work_struct *work) -{ - struct addr_entry_release_work *w; - struct mptcp_pm_addr_entry *entry; - - w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork); - entry = w->entry; - if (entry) { - if (entry->lsk) - sock_release(entry->lsk); - kfree(entry); - } - kfree(w); -} - -static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry) +/* caller must ensure the RCU grace period is already elapsed */ +static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) { - struct addr_entry_release_work *w; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry); - w->entry = entry; - queue_rcu_work(system_wq, &w->rwork); - } + if (entry->lsk) + sock_release(entry->lsk); + kfree(entry); } static int mptcp_nl_remove_id_zero_address(struct net *net, @@ -1245,7 +1382,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr); - mptcp_pm_free_addr_entry(entry); + synchronize_rcu(); + __mptcp_pm_release_addr_entry(entry); return ret; } @@ -1298,6 +1436,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net, } } +/* caller must ensure the RCU grace period is already elapsed */ static void __flush_addrs(struct list_head *list) { while (!list_empty(list)) { @@ -1306,7 +1445,7 @@ static void __flush_addrs(struct list_head *list) cur = list_entry(list->next, struct mptcp_pm_addr_entry, list); list_del_rcu(&cur->list); - mptcp_pm_free_addr_entry(cur); + __mptcp_pm_release_addr_entry(cur); } } @@ -1330,6 +1469,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); + synchronize_rcu(); __flush_addrs(&free_list); return 0; } @@ -1923,6 +2063,7 @@ static int __net_init pm_nl_init_net(struct net *net) INIT_LIST_HEAD_RCU(&pernet->local_addr_list); pernet->next_id = 1; + pernet->stale_loss_cnt = 4; spin_lock_init(&pernet->lock); /* No need to initialize other pernet fields, the struct is zeroed at @@ -1940,7 +2081,8 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list) struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); /* net is removed from namespace list, can't race with - * other modifiers + * other modifiers, also netns core already waited for a + * RCU grace period. */ __flush_addrs(&pernet->local_addr_list); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a88924947815..22214a58d892 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -411,16 +411,29 @@ static void mptcp_set_datafin_timeout(const struct sock *sk) TCP_RTO_MIN << icsk->icsk_retransmits); } -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) +static void __mptcp_set_timeout(struct sock *sk, long tout) { - long tout = ssk && inet_csk(ssk)->icsk_pending ? - inet_csk(ssk)->icsk_timeout - jiffies : 0; - - if (tout <= 0) - tout = mptcp_sk(sk)->timer_ival; mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } +static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subflow) +{ + const struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + return inet_csk(ssk)->icsk_pending && !subflow->stale_count ? + inet_csk(ssk)->icsk_timeout - jiffies : 0; +} + +static void mptcp_set_timeout(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + long tout = 0; + + mptcp_for_each_subflow(mptcp_sk(sk), subflow) + tout = max(tout, mptcp_timeout_from_subflow(subflow)); + __mptcp_set_timeout(sk, tout); +} + static bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & @@ -531,7 +544,6 @@ static bool mptcp_check_data_fin(struct sock *sk) } ret = true; - mptcp_set_timeout(sk, NULL); mptcp_send_ack(msk); mptcp_close_wake_up(sk); } @@ -791,10 +803,7 @@ static void mptcp_reset_timer(struct sock *sk) if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) return; - /* should never be called with mptcp level timer cleared */ - tout = READ_ONCE(mptcp_sk(sk)->timer_ival); - if (WARN_ON_ONCE(!tout)) - tout = TCP_RTO_MIN; + tout = mptcp_sk(sk)->timer_ival; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout); } @@ -1046,8 +1055,14 @@ static void __mptcp_clean_una(struct sock *sk) if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) break; - if (WARN_ON_ONCE(dfrag == msk->first_pending)) - break; + if (unlikely(dfrag == msk->first_pending)) { + /* in recovery mode can see ack after the current snd head */ + if (WARN_ON_ONCE(!msk->recovery)) + break; + + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + } + dfrag_clear(sk, dfrag); cleaned = true; } @@ -1056,8 +1071,14 @@ static void __mptcp_clean_una(struct sock *sk) if (dfrag && after64(snd_una, dfrag->data_seq)) { u64 delta = snd_una - dfrag->data_seq; - if (WARN_ON_ONCE(delta > dfrag->already_sent)) - goto out; + /* prevent wrap around in recovery mode */ + if (unlikely(delta > dfrag->already_sent)) { + if (WARN_ON_ONCE(!msk->recovery)) + goto out; + if (WARN_ON_ONCE(delta > dfrag->data_len)) + goto out; + dfrag->already_sent += delta - dfrag->already_sent; + } dfrag->data_seq += delta; dfrag->offset += delta; @@ -1068,6 +1089,10 @@ static void __mptcp_clean_una(struct sock *sk) cleaned = true; } + /* all retransmitted data acked, recovery completed */ + if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt)) + msk->recovery = false; + out: if (cleaned) { if (tcp_under_memory_pressure(sk)) { @@ -1076,8 +1101,8 @@ out: } } - if (snd_una == READ_ONCE(msk->snd_nxt)) { - if (msk->timer_ival && !mptcp_data_fin_enabled(msk)) + if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) { + if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) mptcp_stop_timer(sk); } else { mptcp_reset_timer(sk); @@ -1366,16 +1391,44 @@ struct subflow_send_info { u64 ratio; }; +void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow) +{ + if (!subflow->stale) + return; + + subflow->stale = 0; + MPTCP_INC_STATS(sock_net(mptcp_subflow_tcp_sock(subflow)), MPTCP_MIB_SUBFLOWRECOVER); +} + +bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +{ + if (unlikely(subflow->stale)) { + u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp); + + if (subflow->stale_rcv_tstamp == rcv_tstamp) + return false; + + mptcp_subflow_set_active(subflow); + } + return __mptcp_subflow_active(subflow); +} + +/* implement the mptcp packet scheduler; + * returns the subflow that will transmit the next DSS + * additionally updates the rtx timeout + */ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct subflow_send_info send_info[2]; struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; int i, nr_active = 0; struct sock *ssk; + long tout = 0; u64 ratio; u32 pace; - sock_owned_by_me((struct sock *)msk); + sock_owned_by_me(sk); if (__mptcp_check_fallback(msk)) { if (!msk->first) @@ -1386,8 +1439,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) /* re-use last subflow, if the burst allow that */ if (msk->last_snd && msk->snd_burst > 0 && sk_stream_memory_free(msk->last_snd) && - mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) + mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) { + mptcp_set_timeout(sk); return msk->last_snd; + } /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < 2; ++i) { @@ -1400,6 +1455,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) if (!mptcp_subflow_active(subflow)) continue; + tout = max(tout, mptcp_timeout_from_subflow(subflow)); nr_active += !subflow->backup; if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd) continue; @@ -1415,6 +1471,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) send_info[subflow->backup].ratio = ratio; } } + __mptcp_set_timeout(sk, tout); /* pick the best backup if no other subflow is active */ if (!nr_active) @@ -1433,12 +1490,11 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) static void mptcp_push_release(struct sock *sk, struct sock *ssk, struct mptcp_sendmsg_info *info) { - mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal); release_sock(ssk); } -static void __mptcp_push_pending(struct sock *sk, unsigned int flags) +void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); @@ -1501,12 +1557,11 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags) mptcp_push_release(sk, ssk, &info); out: - if (copied) { - /* start the timer, if it's not pending */ - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + /* ensure the rtx timer is running */ + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + if (copied) __mptcp_check_send_data_fin(sk); - } } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) @@ -1567,7 +1622,6 @@ out: */ __mptcp_update_wmem(sk); if (copied) { - mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); if (!mptcp_timer_pending(sk)) @@ -2083,10 +2137,11 @@ static void mptcp_timeout_timer(struct timer_list *t) * * A backup subflow is returned only if that is the only kind available. */ -static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) +static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) { + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; - struct sock *backup = NULL; + int min_stale_count = INT_MAX; sock_owned_by_me((const struct sock *)msk); @@ -2096,14 +2151,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - if (!mptcp_subflow_active(subflow)) + if (!__mptcp_subflow_active(subflow)) continue; - /* still data outstanding at TCP level? Don't retransmit. */ - if (!tcp_write_queue_empty(ssk)) { - if (inet_csk(ssk)->icsk_ca_state >= TCP_CA_Loss) - continue; - return NULL; + /* still data outstanding at TCP level? skip this */ + if (!tcp_rtx_and_write_queues_empty(ssk)) { + mptcp_pm_subflow_chk_stale(msk, ssk); + min_stale_count = min_t(int, min_stale_count, subflow->stale_count); + continue; } if (subflow->backup) { @@ -2112,10 +2167,15 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) continue; } - return ssk; + if (!pick) + pick = ssk; } - return backup; + if (pick) + return pick; + + /* use backup only if there are no progresses anywhere */ + return min_stale_count > 1 ? backup : NULL; } static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) @@ -2126,6 +2186,50 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) } } +bool __mptcp_retransmit_pending_data(struct sock *sk) +{ + struct mptcp_data_frag *cur, *rtx_head; + struct mptcp_sock *msk = mptcp_sk(sk); + + if (__mptcp_check_fallback(mptcp_sk(sk))) + return false; + + if (tcp_rtx_and_write_queues_empty(sk)) + return false; + + /* the closing socket has some data untransmitted and/or unacked: + * some data in the mptcp rtx queue has not really xmitted yet. + * keep it simple and re-inject the whole mptcp level rtx queue + */ + mptcp_data_lock(sk); + __mptcp_clean_una_wakeup(sk); + rtx_head = mptcp_rtx_head(sk); + if (!rtx_head) { + mptcp_data_unlock(sk); + return false; + } + + /* will accept ack for reijected data before re-sending them */ + if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt)) + msk->recovery_snd_nxt = msk->snd_nxt; + msk->recovery = true; + mptcp_data_unlock(sk); + + msk->first_pending = rtx_head; + msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq; + msk->snd_nxt = rtx_head->data_seq; + msk->snd_burst = 0; + + /* be sure to clear the "sent status" on all re-injected fragments */ + list_for_each_entry(cur, &msk->rtx_queue, list) { + if (!cur->already_sent) + break; + cur->already_sent = 0; + } + + return true; +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2138,6 +2242,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { struct mptcp_sock *msk = mptcp_sk(sk); + bool need_push; list_del(&subflow->node); @@ -2149,6 +2254,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (ssk->sk_socket) sock_orphan(ssk); + need_push = __mptcp_retransmit_pending_data(sk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2176,6 +2282,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (msk->subflow && ssk == msk->subflow->sk) mptcp_dispose_initial_subflow(msk); + + if (need_push) + __mptcp_push_pending(sk, 0); } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2313,7 +2422,6 @@ static void __mptcp_retrans(struct sock *sk) info.size_goal); } - mptcp_set_timeout(sk, ssk); release_sock(ssk); reset_timer: @@ -2384,10 +2492,12 @@ static int __mptcp_init_sock(struct sock *sk) msk->wmem_reserved = 0; WRITE_ONCE(msk->rmem_released, 0); msk->tx_pending_data = 0; + msk->timer_ival = TCP_RTO_MIN; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); + msk->recovery = false; mptcp_pm_data_init(msk); @@ -2472,7 +2582,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) tcp_shutdown(ssk, how); } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); - mptcp_set_timeout(sk, ssk); tcp_send_ack(ssk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0f0c026c5f8b..bc1bfd7ac9c1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -230,12 +230,17 @@ struct mptcp_sock { struct sock *last_snd; int snd_burst; int old_wspace; + u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; + * recovery related fields are under data_lock + * protection + */ u64 snd_una; u64 wnd_end; unsigned long timer_ival; u32 token; int rmem_released; unsigned long flags; + bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; bool rcv_data_fin; @@ -427,7 +432,8 @@ struct mptcp_subflow_context { send_mp_prio : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ - disposable : 1; /* ctx can be free at ulp release time */ + disposable : 1, /* ctx can be free at ulp release time */ + stale : 1; /* unable to snd/rcv data, do not use for xmit */ enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -439,11 +445,13 @@ struct mptcp_subflow_context { u8 reset_seen:1; u8 reset_transient:1; u8 reset_reason:4; + u8 stale_count; long delegated_status; struct list_head delegated_node; /* link into delegated_action, protected by local BH */ - u32 setsockopt_seq; + u32 setsockopt_seq; + u32 stale_rcv_tstamp; struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ @@ -549,12 +557,15 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); } -int mptcp_is_enabled(struct net *net); -unsigned int mptcp_get_add_addr_timeout(struct net *net); -int mptcp_is_checksum_enabled(struct net *net); -int mptcp_allow_join_id0(struct net *net); +int mptcp_is_enabled(const struct net *net); +unsigned int mptcp_get_add_addr_timeout(const struct net *net); +int mptcp_is_checksum_enabled(const struct net *net); +int mptcp_allow_join_id0(const struct net *net); +unsigned int mptcp_stale_loss_cnt(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); +bool __mptcp_retransmit_pending_data(struct sock *sk); +void __mptcp_push_pending(struct sock *sk, unsigned int flags); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); @@ -566,14 +577,13 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote, - u8 flags, int ifindex); + const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); -static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -585,6 +595,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); } +void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); + +bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); + static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { @@ -690,6 +704,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); +void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); +void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); @@ -716,6 +732,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr); +int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, + u8 *flags, int *ifindex); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 966f777d35ce..8c43aa14897a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -435,10 +435,12 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto do_reset; } + subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, - subflow->thmac, subflow->remote_nonce); + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", + subflow, subflow->thmac, subflow->remote_nonce, + subflow->backup); if (!subflow_thmac_valid(subflow)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); @@ -1353,8 +1355,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, } int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote, - u8 flags, int ifindex) + const struct mptcp_addr_info *remote) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; @@ -1365,6 +1366,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, struct sock *ssk; u32 remote_token; int addrlen; + int ifindex; + u8 flags; int err; if (!mptcp_is_fully_established(sk)) @@ -1388,6 +1391,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, local_id = err; } + mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id, + &flags, &ifindex); subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; subflow->token = msk->token; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index d1bef23fd4f5..dd30c03d5a23 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + /* 64bit division is not allowed on 32bit */ + if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) { ip = ntohl(h->next.ip); e.ip = htonl(ip); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 18346d18aa16..153de3457423 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; + if (e.mark == 0 && e.ip == 0) + return -IPSET_ERR_HASH_ELEM; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { @@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (e.mark == 0 && ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } + if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index e1ca11196515..7303138e46be 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index ab179e064597..334fb1ad0e86 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 8f075b44cf64..7df94f437f60 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c1a11f041ac6..1422739d9aa2 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); do { diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index ddd51c2e1cb3..9810f5bf63f5 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; if (retried) ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 6532f0505e66..3d09eefe998a 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; + u64 n = 0, m = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index ec1564a1cb5a..09cf72eb37f8 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; + u64 n = 0; bool with_ports = false; u8 cidr; int ret; @@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); + n++; + } while (ipn++ < ip_to); + + if (n*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 0e91d1e82f1c..19bcdb3141f6 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, ipn; + u64 n = 0, m = 0; bool with_ports = false; int ret; @@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5c03e5106751..d31dbccbe7bd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; - u32 last_bucket; + u32 next_bucket; bool exiting; bool early_drop; - long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; -/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 128u -/* upper bound of full table scan */ -#define GC_MAX_SCAN_JIFFIES (16u * HZ) -/* desired ratio of entries found to be expired */ -#define GC_EVICT_RATIO 50u +#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) static struct conntrack_gc_work conntrack_gc_work; @@ -1363,17 +1358,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); - unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned int nf_conntrack_max95 = 0; + unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; + unsigned int i, hashsz, nf_conntrack_max95 = 0; + unsigned long next_run = GC_SCAN_INTERVAL; struct conntrack_gc_work *gc_work; - unsigned int ratio, scanned = 0; - unsigned long next_run; - gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; - i = gc_work->last_bucket; + i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; @@ -1381,15 +1372,15 @@ static void gc_worker(struct work_struct *work) struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int hashsz; struct nf_conn *tmp; - i++; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hashsz); - if (i >= hashsz) - i = 0; + if (i >= hashsz) { + rcu_read_unlock(); + break; + } hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; @@ -1397,7 +1388,6 @@ static void gc_worker(struct work_struct *work) tmp = nf_ct_tuplehash_to_ctrack(h); - scanned++; if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { nf_ct_offload_timeout(tmp); continue; @@ -1405,7 +1395,6 @@ static void gc_worker(struct work_struct *work) if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); - expired_count++; continue; } @@ -1438,7 +1427,14 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched(); - } while (++buckets < goal); + i++; + + if (time_after(jiffies, end_time) && i < hashsz) { + gc_work->next_bucket = i; + next_run = 0; + break; + } + } while (i < hashsz); if (gc_work->exiting) return; @@ -1449,40 +1445,17 @@ static void gc_worker(struct work_struct *work) * * This worker is only here to reap expired entries when system went * idle after a busy period. - * - * The heuristics below are supposed to balance conflicting goals: - * - * 1. Minimize time until we notice a stale entry - * 2. Maximize scan intervals to not waste cycles - * - * Normally, expire ratio will be close to 0. - * - * As soon as a sizeable fraction of the entries have expired - * increase scan frequency. */ - ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio > GC_EVICT_RATIO) { - gc_work->next_gc_run = min_interval; - } else { - unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - - BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); - - gc_work->next_gc_run += min_interval; - if (gc_work->next_gc_run > max) - gc_work->next_gc_run = max; + if (next_run) { + gc_work->early_drop = false; + gc_work->next_bucket = 0; } - - next_run = gc_work->next_gc_run; - gc_work->last_bucket = i; - gc_work->early_drop = false; queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = HZ; gc_work->exiting = false; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e81af33b233b..eb35c6151fb0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -852,6 +852,11 @@ static int ctnetlink_done(struct netlink_callback *cb) return 0; } +struct ctnetlink_filter_u32 { + u32 val; + u32 mask; +}; + struct ctnetlink_filter { u8 family; @@ -862,10 +867,8 @@ struct ctnetlink_filter { struct nf_conntrack_tuple reply; struct nf_conntrack_zone zone; - struct { - u_int32_t val; - u_int32_t mask; - } mark; + struct ctnetlink_filter_u32 mark; + struct ctnetlink_filter_u32 status; }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { @@ -907,6 +910,46 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], struct nf_conntrack_zone *zone, u_int32_t flags); +static int ctnetlink_filter_parse_mark(struct ctnetlink_filter_u32 *mark, + const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_MARK + if (cda[CTA_MARK]) { + mark->val = ntohl(nla_get_be32(cda[CTA_MARK])); + + if (cda[CTA_MARK_MASK]) + mark->mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + else + mark->mask = 0xffffffff; + } else if (cda[CTA_MARK_MASK]) { + return -EINVAL; + } +#endif + return 0; +} + +static int ctnetlink_filter_parse_status(struct ctnetlink_filter_u32 *status, + const struct nlattr * const cda[]) +{ + if (cda[CTA_STATUS]) { + status->val = ntohl(nla_get_be32(cda[CTA_STATUS])); + if (cda[CTA_STATUS_MASK]) + status->mask = ntohl(nla_get_be32(cda[CTA_STATUS_MASK])); + else + status->mask = status->val; + + /* status->val == 0? always true, else always false. */ + if (status->mask == 0) + return -EINVAL; + } else if (cda[CTA_STATUS_MASK]) { + return -EINVAL; + } + + /* CTA_STATUS is NLA_U32, if this fires UAPI needs to be extended */ + BUILD_BUG_ON(__IPS_MAX_BIT >= 32); + return 0; +} + static struct ctnetlink_filter * ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) { @@ -924,18 +967,14 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) filter->family = family; -#ifdef CONFIG_NF_CONNTRACK_MARK - if (cda[CTA_MARK]) { - filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - if (cda[CTA_MARK_MASK]) - filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); - else - filter->mark.mask = 0xffffffff; - } else if (cda[CTA_MARK_MASK]) { - err = -EINVAL; + err = ctnetlink_filter_parse_mark(&filter->mark, cda); + if (err) goto err_filter; - } -#endif + + err = ctnetlink_filter_parse_status(&filter->status, cda); + if (err) + goto err_filter; + if (!cda[CTA_FILTER]) return filter; @@ -989,7 +1028,7 @@ err_filter: static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda) { - return family || cda[CTA_MARK] || cda[CTA_FILTER]; + return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS]; } static int ctnetlink_start(struct netlink_callback *cb) @@ -1082,6 +1121,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) { struct ctnetlink_filter *filter = data; struct nf_conntrack_tuple *tuple; + u32 status; if (filter == NULL) goto out; @@ -1113,6 +1153,9 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) if ((ct->mark & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif + status = (u32)READ_ONCE(ct->status); + if ((status & filter->status.mask) != filter->status.val) + goto ignore_entry; out: return 1; @@ -1495,6 +1538,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_LABELS_MASK] = { .type = NLA_BINARY, .len = NF_CT_LABELS_MAX_SIZE }, [CTA_FILTER] = { .type = NLA_NESTED }, + [CTA_STATUS_MASK] = { .type = NLA_U32 }, }; static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3259416f2ea4..af5115e127cf 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) tn->offload_timeout = 30 * HZ; - tn->offload_pickup = 120 * HZ; #endif } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 698fee49e732..f8e3c0d2602f 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) un->offload_timeout = 30 * HZ; - un->offload_pickup = 30 * HZ; #endif } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 214d9f9e499b..e84b499b7bfa 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -575,7 +575,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD, - NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP, #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, @@ -585,7 +584,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD, - NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP, #endif NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, @@ -776,12 +774,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = { - .procname = "nf_flowtable_tcp_pickup", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { .procname = "nf_conntrack_tcp_loose", @@ -832,12 +824,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = { - .procname = "nf_flowtable_udp_pickup", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { .procname = "nf_conntrack_icmp_timeout", @@ -1018,7 +1004,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout; - table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup; #endif } @@ -1111,7 +1096,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout; - table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup; #endif nf_conntrack_standalone_init_tcp_sysctl(net, table); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 8ecad71b3613..87a7388b6c89 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -180,27 +180,27 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) { - const struct nf_conntrack_l4proto *l4proto; struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); - unsigned int timeout; - - l4proto = nf_ct_l4proto_find(l4num); - if (!l4proto) - return; + s32 timeout; if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - timeout = tn->offload_pickup; + timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); - timeout = tn->offload_pickup; + timeout = tn->timeouts[UDP_CT_REPLIED]; + timeout -= tn->offload_timeout; } else { return; } + if (timeout < 0) + timeout = 0; + if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) ct->timeout = nfct_time_stamp + timeout; } @@ -273,15 +273,10 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = { unsigned long flow_offload_get_timeout(struct flow_offload *flow) { - const struct nf_conntrack_l4proto *l4proto; unsigned long timeout = NF_FLOW_TIMEOUT; struct net *net = nf_ct_net(flow->ct); int l4num = nf_ct_protonum(flow->ct); - l4proto = nf_ct_l4proto_find(l4num); - if (!l4proto) - return timeout; - if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2bfd9f1b8f11..d6bf1b2cd541 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1096,6 +1096,7 @@ static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; + bo->cb_list_head = &flowtable->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 7f2f69b609d8..6d12afabfe8a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -21,6 +21,8 @@ #include "nf_internals.h" +static const struct nf_queue_handler __rcu *nf_queue_handler; + /* * Hook for nfnetlink_queue to register its queue handler. * We do this so that most of the NFQUEUE code can be modular. @@ -29,20 +31,18 @@ * receives, no matter what. */ -/* return EBUSY when somebody else is registered, return EEXIST if the - * same handler is registered, return 0 in case of success. */ -void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh) +void nf_register_queue_handler(const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ - WARN_ON(rcu_access_pointer(net->nf.queue_handler)); - rcu_assign_pointer(net->nf.queue_handler, qh); + WARN_ON(rcu_access_pointer(nf_queue_handler)); + rcu_assign_pointer(nf_queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -void nf_unregister_queue_handler(struct net *net) +void nf_unregister_queue_handler(void) { - RCU_INIT_POINTER(net->nf.queue_handler, NULL); + RCU_INIT_POINTER(nf_queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); @@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net) const struct nf_queue_handler *qh; rcu_read_lock(); - qh = rcu_dereference(net->nf.queue_handler); + qh = rcu_dereference(nf_queue_handler); if (qh) qh->nf_hook_drop(net); rcu_read_unlock(); @@ -149,12 +149,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, { struct nf_queue_entry *entry = NULL; const struct nf_queue_handler *qh; - struct net *net = state->net; unsigned int route_key_size; int status; /* QUEUE == DROP if no one is waiting, to be safe. */ - qh = rcu_dereference(net->nf.queue_handler); + qh = rcu_dereference(nf_queue_handler); if (!qh) return -ESRCH; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index b58d73a96523..9656c1646222 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -353,6 +353,7 @@ static void nft_flow_block_offload_init(struct flow_block_offload *bo, bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; + bo->cb_list_head = &basechain->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 202f57d17bab..f554e2ea32ee 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -89,11 +89,15 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, if (!nest2) goto cancel_nest; - ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name); + ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name); if (ret) goto cancel_nest; - ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name); + ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name); + if (ret) + goto cancel_nest; + + ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family); if (ret) goto cancel_nest; @@ -109,18 +113,19 @@ cancel_nest: static int nfnl_hook_dump_one(struct sk_buff *nlskb, const struct nfnl_dump_hook_data *ctx, const struct nf_hook_ops *ops, - unsigned int seq) + int family, unsigned int seq) { u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET); unsigned int portid = NETLINK_CB(nlskb).portid; struct nlmsghdr *nlh; int ret = -EMSGSIZE; + u32 hooknum; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; char *module_name; #endif nlh = nfnl_msg_put(nlskb, portid, seq, event, - NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0); + NLM_F_MULTI, family, NFNETLINK_V0, 0); if (!nlh) goto nla_put_failure; @@ -135,6 +140,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, if (module_name) { char *end; + *module_name = '\0'; module_name += 2; end = strchr(module_name, ']'); if (end) { @@ -151,7 +157,12 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, goto nla_put_failure; #endif - ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum)); + if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS) + hooknum = NF_NETDEV_INGRESS; + else + hooknum = ops->hooknum; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum)); if (ret) goto nla_put_failure; @@ -259,7 +270,8 @@ static int nfnl_hook_dump(struct sk_buff *nlskb, ops = nf_hook_entries_get_hook_ops(e); for (; i < e->num_hook_entries; i++) { - err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq); + err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family, + cb->nlh->nlmsg_seq); if (err) break; } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f774de0fc24f..4c3fbaaeb103 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -951,6 +951,16 @@ static void nfqnl_nf_hook_drop(struct net *net) struct nfnl_queue_net *q = nfnl_queue_pernet(net); int i; + /* This function is also called on net namespace error unwind, + * when pernet_ops->init() failed and ->exit() functions of the + * previous pernet_ops gets called. + * + * This may result in a call to nfqnl_nf_hook_drop() before + * struct nfnl_queue_net was allocated. + */ + if (!q) + return; + for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; @@ -1502,7 +1512,6 @@ static int __net_init nfnl_queue_net_init(struct net *net) &nfqnl_seq_ops, sizeof(struct iter_state))) return -ENOMEM; #endif - nf_register_queue_handler(net, &nfqh); return 0; } @@ -1511,7 +1520,6 @@ static void __net_exit nfnl_queue_net_exit(struct net *net) struct nfnl_queue_net *q = nfnl_queue_pernet(net); unsigned int i; - nf_unregister_queue_handler(net); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); #endif @@ -1555,6 +1563,8 @@ static int __init nfnetlink_queue_init(void) goto cleanup_netlink_subsys; } + nf_register_queue_handler(&nfqh); + return status; cleanup_netlink_subsys: @@ -1568,6 +1578,7 @@ out: static void __exit nfnetlink_queue_fini(void) { + nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 639c337c885b..272bcdb1392d 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -683,14 +683,12 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb, goto out_put; } - ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; + ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); out_put: rcu_read_lock(); module_put(THIS_MODULE); - return ret == -EAGAIN ? -ENOBUFS : ret; + + return ret; } static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 84e58ee501a4..25524e393349 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -39,6 +39,20 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define XT_PCPU_BLOCK_SIZE 4096 #define XT_MAX_TABLE_SIZE (512 * 1024 * 1024) +struct xt_template { + struct list_head list; + + /* called when table is needed in the given netns */ + int (*table_init)(struct net *net); + + struct module *me; + + /* A unique name... */ + char name[XT_TABLE_MAXNAMELEN]; +}; + +static struct list_head xt_templates[NFPROTO_NUMPROTO]; + struct xt_pernet { struct list_head tables[NFPROTO_NUMPROTO]; }; @@ -1221,48 +1235,43 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); - struct xt_table *t, *found = NULL; + struct module *owner = NULL; + struct xt_template *tmpl; + struct xt_table *t; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt_net->tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; - if (net == &init_net) - goto out; - - /* Table doesn't exist in this netns, re-try init */ - xt_net = net_generic(&init_net, xt_pernet_id); - list_for_each_entry(t, &xt_net->tables[af], list) { + /* Table doesn't exist in this netns, check larval list */ + list_for_each_entry(tmpl, &xt_templates[af], list) { int err; - if (strcmp(t->name, name)) + if (strcmp(tmpl->name, name)) continue; - if (!try_module_get(t->me)) + if (!try_module_get(tmpl->me)) goto out; + + owner = tmpl->me; + mutex_unlock(&xt[af].mutex); - err = t->table_init(net); + err = tmpl->table_init(net); if (err < 0) { - module_put(t->me); + module_put(owner); return ERR_PTR(err); } - found = t; - mutex_lock(&xt[af].mutex); break; } - if (!found) - goto out; - - xt_net = net_generic(net, xt_pernet_id); /* and once again: */ list_for_each_entry(t, &xt_net->tables[af], list) if (strcmp(t->name, name) == 0) return t; - module_put(found->me); + module_put(owner); out: mutex_unlock(&xt[af].mutex); return ERR_PTR(-ENOENT); @@ -1749,6 +1758,58 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) } EXPORT_SYMBOL_GPL(xt_hook_ops_alloc); +int xt_register_template(const struct xt_table *table, + int (*table_init)(struct net *net)) +{ + int ret = -EEXIST, af = table->af; + struct xt_template *t; + + mutex_lock(&xt[af].mutex); + + list_for_each_entry(t, &xt_templates[af], list) { + if (WARN_ON_ONCE(strcmp(table->name, t->name) == 0)) + goto out_unlock; + } + + ret = -ENOMEM; + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out_unlock; + + BUILD_BUG_ON(sizeof(t->name) != sizeof(table->name)); + + strscpy(t->name, table->name, sizeof(t->name)); + t->table_init = table_init; + t->me = table->me; + list_add(&t->list, &xt_templates[af]); + ret = 0; +out_unlock: + mutex_unlock(&xt[af].mutex); + return ret; +} +EXPORT_SYMBOL_GPL(xt_register_template); + +void xt_unregister_template(const struct xt_table *table) +{ + struct xt_template *t; + int af = table->af; + + mutex_lock(&xt[af].mutex); + list_for_each_entry(t, &xt_templates[af], list) { + if (strcmp(table->name, t->name)) + continue; + + list_del(&t->list); + mutex_unlock(&xt[af].mutex); + kfree(t); + return; + } + + mutex_unlock(&xt[af].mutex); + WARN_ON_ONCE(1); +} +EXPORT_SYMBOL_GPL(xt_unregister_template); + int xt_proto_init(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS @@ -1937,6 +1998,7 @@ static int __init xt_init(void) #endif INIT_LIST_HEAD(&xt[i].target); INIT_LIST_HEAD(&xt[i].match); + INIT_LIST_HEAD(&xt_templates[i]); } rv = register_pernet_subsys(&xt_net_ops); if (rv < 0) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 12404d221026..0a913ce07425 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -351,21 +351,10 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static int notrack_chk(const struct xt_tgchk_param *par) -{ - if (!par->net->xt.notrack_deprecated_warning) { - pr_info("netfilter: NOTRACK target is deprecated, " - "use CT instead or upgrade iptables\n"); - par->net->xt.notrack_deprecated_warning = true; - } - return 0; -} - static struct xt_target notrack_tg_reg __read_mostly = { .name = "NOTRACK", .revision = 0, .family = NFPROTO_UNSPEC, - .checkentry = notrack_chk, .target = notrack_tg, .table = "raw", .me = THIS_MODULE, diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 5e39640becdb..0ca214ab5aef 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -140,7 +140,7 @@ static void rawsock_data_exchange_complete(void *context, struct sk_buff *skb, { struct sock *sk = (struct sock *) context; - BUG_ON(in_irq()); + BUG_ON(in_hardirq()); pr_debug("sk=%p err=%d\n", sk, err); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index e586424d8b04..9713035b89e3 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -293,14 +293,14 @@ static bool icmp6hdr_ok(struct sk_buff *skb) } /** - * Parse vlan tag from vlan header. + * parse_vlan_tag - Parse vlan tag from vlan header. * @skb: skb containing frame to parse * @key_vh: pointer to parsed vlan tag * @untag_vlan: should the vlan header be removed from the frame * - * Returns ERROR on memory error. - * Returns 0 if it encounters a non-vlan or incomplete packet. - * Returns 1 after successfully parsing vlan tag. + * Return: ERROR on memory error. + * %0 if it encounters a non-vlan or incomplete packet. + * %1 after successfully parsing vlan tag. */ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh, bool untag_vlan) @@ -532,6 +532,7 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) * L3 header * @key: output flow key * + * Return: %0 if successful, otherwise a negative errno value. */ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) { @@ -748,8 +749,6 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) * * The caller must ensure that skb->len >= ETH_HLEN. * - * Returns 0 if successful, otherwise a negative errno value. - * * Initializes @skb header fields as follows: * * - skb->mac_header: the L2 header. @@ -764,6 +763,8 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) * * - skb->protocol: the type of the data starting at skb->network_header. * Equals to key->eth.type. + * + * Return: %0 if successful, otherwise a negative errno value. */ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 88deb5b41429..cf2ce5812489 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -507,6 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) } skb->dev = vport->dev; + skb->tstamp = 0; vport->ops->send(skb); return; diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index fa611678af05..1dc955ca57d3 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -15,6 +15,7 @@ struct qrtr_mhi_dev { struct qrtr_endpoint ep; struct mhi_device *mhi_dev; struct device *dev; + struct completion ready; }; /* From MHI to QRTR */ @@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); int rc; + rc = wait_for_completion_interruptible(&qdev->ready); + if (rc) + goto free_skb; + if (skb->sk) sock_hold(skb->sk); @@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev); + rc = mhi_prepare_for_transfer(mhi_dev, 0); if (rc) return rc; @@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, if (rc) return rc; + /* start channels */ + rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS); + if (rc) { + qrtr_endpoint_unregister(&qdev->ep); + dev_set_drvdata(&mhi_dev->dev, NULL); + return rc; + } + + complete_all(&qdev->ready); dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 9b6ffff72f2d..28c1b0022178 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, &off, PAGE_SIZE); - if (unlikely(ret != ibmr->sg_len)) + if (unlikely(ret != ibmr->sg_dma_len)) return ret < 0 ? ret : -EINVAL; if (cmpxchg(&frmr->fr_state, diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 0885b22e5c0e..accd35c05577 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -21,6 +21,8 @@ config AF_RXRPC See Documentation/networking/rxrpc.rst. +if AF_RXRPC + config AF_RXRPC_IPV6 bool "IPv6 support for RxRPC" depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) @@ -30,7 +32,6 @@ config AF_RXRPC_IPV6 config AF_RXRPC_INJECT_LOSS bool "Inject packet loss into RxRPC packet stream" - depends on AF_RXRPC help Say Y here to inject packet loss by discarding some received and some transmitted packets. @@ -38,7 +39,6 @@ config AF_RXRPC_INJECT_LOSS config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" - depends on AF_RXRPC help Say Y here to make runtime controllable debugging messages appear. @@ -47,7 +47,6 @@ config AF_RXRPC_DEBUG config RXKAD bool "RxRPC Kerberos security" - depends on AF_RXRPC select CRYPTO select CRYPTO_MANAGER select CRYPTO_SKCIPHER @@ -58,3 +57,5 @@ config RXKAD through the use of the key retention service. See Documentation/networking/rxrpc.rst. + +endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 37f51d778728..d64b0eeccbe4 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -271,6 +271,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } + /* All mirred/redirected skbs should clear previous ct info */ + nf_reset_ct(skb2); + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); expects_nh = want_ingress || !m_mac_header_xmit; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 69185e311422..2ef8f5a6205a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -634,6 +634,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo, bo->block_shared = shared; bo->extack = extack; bo->sch = sch; + bo->cb_list_head = &flow_block->cb_list; INIT_LIST_HEAD(&bo->cb_list); } @@ -1949,7 +1950,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; int tp_created; bool rtnl_held = false; - u32 flags = 0; + u32 flags; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1970,6 +1971,7 @@ replay: tp = NULL; cl = 0; block = NULL; + flags = 0; if (prio == 0) { /* If no priority is provided by the user, diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index ecc5c4d93779..3c2300d14468 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -720,7 +720,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, skip_hash: if (flow_override) flow_hash = flow_override - 1; - else if (use_skbhash) + else if (use_skbhash && (flow_mode & CAKE_FLOW_FLOWS)) flow_hash = skb->hash; if (host_override) { dsthost_hash = host_override - 1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d9ac60ffe927..a8dd06c74e31 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); - lockdep_set_class(&sch->busylock, + lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); seqcount_init(&sch->running); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 07b30d0601d7..9c79374457a0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch) if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); } else { old = dev_graft_qdisc(qdisc->dev_queue, sch); qdisc_refcount_inc(sch); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fe74c5f95630..db6b7373d16c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); cur_key->key = key; - if (replace) { - list_del_init(&shkey->key_list); - sctp_auth_shkey_release(shkey); - if (asoc && asoc->active_key_id == auth_key->sca_keynumber) - sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + if (!replace) { + list_add(&cur_key->key_list, sh_keys); + return 0; } + + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); list_add(&cur_key->key_list, sh_keys); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + return 0; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 898389611ae8..c038efc23ce3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -795,7 +795,7 @@ static int smc_connect_rdma(struct smc_sock *smc, reason_code = SMC_CLC_DECL_NOSRVLINK; goto connect_abort; } - smc->conn.lnk = link; + smc_switch_link_and_count(&smc->conn, link); } /* create send buffer and rmb */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index cd0d7c908b2a..af227b65669e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -917,8 +917,8 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, return rc; } -static void smc_switch_link_and_count(struct smc_connection *conn, - struct smc_link *to_lnk) +void smc_switch_link_and_count(struct smc_connection *conn, + struct smc_link *to_lnk) { atomic_dec(&conn->lnk->conn_cnt); conn->lnk = to_lnk; @@ -1752,21 +1752,30 @@ out: return rc; } -/* convert the RMB size into the compressed notation - minimum 16K. +#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ +#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */ + +/* convert the RMB size into the compressed notation (minimum 16K, see + * SMCD/R_DMBE_SIZES. * In contrast to plain ilog2, this rounds towards the next power of 2, * so the socket application gets at least its desired sndbuf / rcvbuf size. */ -static u8 smc_compress_bufsize(int size) +static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb) { + const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE; u8 compressed; if (size <= SMC_BUF_MIN_SIZE) return 0; - size = (size - 1) >> 14; - compressed = ilog2(size) + 1; - if (compressed >= SMC_RMBE_SIZES) - compressed = SMC_RMBE_SIZES - 1; + size = (size - 1) >> 14; /* convert to 16K multiple */ + compressed = min_t(u8, ilog2(size) + 1, + is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES); + + if (!is_smcd && is_rmb) + /* RMBs are backed by & limited to max size of scatterlists */ + compressed = min_t(u8, compressed, ilog2(max_scat >> 14)); + return compressed; } @@ -1982,17 +1991,12 @@ out: return rc; } -#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ - static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, bool is_dmb, int bufsize) { struct smc_buf_desc *buf_desc; int rc; - if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) - return ERR_PTR(-EAGAIN); - /* try to alloc a new DMB */ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); if (!buf_desc) @@ -2041,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) /* use socket send buffer size (w/o overhead) as start value */ sk_buf_size = smc->sk.sk_sndbuf / 2; - for (bufsize_short = smc_compress_bufsize(sk_buf_size); + for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); bufsize_short >= 0; bufsize_short--) { - if (is_rmb) { lock = &lgr->rmbs_lock; buf_list = &lgr->rmbs[bufsize_short]; @@ -2052,8 +2055,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) buf_list = &lgr->sndbufs[bufsize_short]; } bufsize = smc_uncompress_bufsize(bufsize_short); - if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) - continue; /* check for reusable slot in the link group */ buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 6d6fd1397c87..c043ecdca5c4 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -97,6 +97,7 @@ struct smc_link { unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */ + atomic_t wr_tx_refcnt; /* tx refs to link */ struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ @@ -109,6 +110,7 @@ struct smc_link { struct ib_reg_wr wr_reg; /* WR register memory region */ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ + atomic_t wr_reg_refcnt; /* reg refs to link */ enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ @@ -444,6 +446,8 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); void smcr_link_clear(struct smc_link *lnk, bool log); +void smc_switch_link_and_count(struct smc_connection *conn, + struct smc_link *to_lnk); int smcr_buf_map_lgr(struct smc_link *lnk); int smcr_buf_reg_lgr(struct smc_link *lnk); void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 273eaf1bfe49..2e7560eba981 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -888,6 +888,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) if (!rc) goto out; out_clear_lnk: + lnk_new->state = SMC_LNK_INACTIVE; smcr_link_clear(lnk_new, false); out_reject: smc_llc_cli_add_link_reject(qentry); @@ -1184,6 +1185,7 @@ int smc_llc_srv_add_link(struct smc_link *link) goto out_err; return 0; out_err: + link_new->state = SMC_LNK_INACTIVE; smcr_link_clear(link_new, false); return rc; } @@ -1286,10 +1288,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) del_llc->reason = 0; smc_llc_send_message(lnk, &qentry->msg); /* response */ - if (smc_link_downing(&lnk_del->state)) { - if (smc_switch_conns(lgr, lnk_del, false)) - smc_wr_tx_wait_no_pending_sends(lnk_del); - } + if (smc_link_downing(&lnk_del->state)) + smc_switch_conns(lgr, lnk_del, false); smcr_link_clear(lnk_del, true); active_links = smc_llc_active_link_count(lgr); @@ -1805,8 +1805,6 @@ void smc_llc_link_clear(struct smc_link *link, bool log) link->smcibdev->ibdev->name, link->ibport); complete(&link->llc_testlink_resp); cancel_delayed_work_sync(&link->llc_testlink_wrk); - smc_wr_wakeup_reg_wait(link); - smc_wr_wakeup_tx_wait(link); } /* register a new rtoken at the remote peer (for all links) */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 289025cd545a..c79361dfcdfb 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn, /* Wakeup sndbuf consumers from any context (IRQ or process) * since there is more data to transmit; usable snd_wnd as max transmit */ -static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) +static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; struct smc_link *link = conn->lnk; @@ -550,6 +550,22 @@ out_unlock: return rc; } +static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) +{ + struct smc_link *link = conn->lnk; + int rc = -ENOLINK; + + if (!link) + return rc; + + atomic_inc(&link->wr_tx_refcnt); + if (smc_link_usable(link)) + rc = _smcr_tx_sndbuf_nonempty(conn); + if (atomic_dec_and_test(&link->wr_tx_refcnt)) + wake_up_all(&link->wr_tx_wait); + return rc; +} + static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index cbc73a7e4d59..a419e9af36b9 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -322,9 +322,12 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) if (rc) return rc; + atomic_inc(&link->wr_reg_refcnt); rc = wait_event_interruptible_timeout(link->wr_reg_wait, (link->wr_reg_state != POSTED), SMC_WR_REG_MR_WAIT_TIME); + if (atomic_dec_and_test(&link->wr_reg_refcnt)) + wake_up_all(&link->wr_reg_wait); if (!rc) { /* timeout - terminate link */ smcr_link_down_cond_sched(link); @@ -566,10 +569,15 @@ void smc_wr_free_link(struct smc_link *lnk) return; ibdev = lnk->smcibdev->ibdev; + smc_wr_wakeup_reg_wait(lnk); + smc_wr_wakeup_tx_wait(lnk); + if (smc_wr_tx_wait_no_pending_sends(lnk)) memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); + wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); + wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, @@ -728,7 +736,9 @@ int smc_wr_create_link(struct smc_link *lnk) memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); init_waitqueue_head(&lnk->wr_tx_wait); + atomic_set(&lnk->wr_tx_refcnt, 0); init_waitqueue_head(&lnk->wr_reg_wait); + atomic_set(&lnk->wr_reg_refcnt, 0); return rc; dma_unmap: diff --git a/net/tipc/link.c b/net/tipc/link.c index cf586840caeb..1b7a487c8841 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, dnode, l->addr, dport, 0, 0); if (!skb) - return -ENOMEM; + return -ENOBUFS; msg_set_dest_droppable(buf_msg(skb), true); TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); skb_queue_tail(&l->wakeupq, skb); @@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l) * * Consumes the buffer chain. * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted - * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM + * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) @@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (!_skb) { kfree_skb(skb); __skb_queue_purge(list); - return -ENOMEM; + return -ENOBUFS; } __skb_queue_tail(transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b15b2b1b2f38..e3105ba407c7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1518,7 +1518,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); - if (timeout) { + if (dlen && timeout) { timeout = msecs_to_jiffies(timeout); tipc_wait_for_connect(sock, &timeout); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ec02e70a549b..4cf0b1c47f0f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1908,7 +1908,6 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other return err; skb_put(skb, 1); - skb->len = 1; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); if (err) { @@ -1917,11 +1916,19 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other } unix_state_lock(other); + + if (sock_flag(other, SOCK_DEAD) || + (other->sk_shutdown & RCV_SHUTDOWN)) { + unix_state_unlock(other); + kfree_skb(skb); + return -EPIPE; + } + maybe_add_creds(skb, sock, other); skb_get(skb); if (ousk->oob_skb) - kfree_skb(ousk->oob_skb); + consume_skb(ousk->oob_skb); ousk->oob_skb = skb; @@ -2426,19 +2433,37 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int chunk = 1; + struct sk_buff *oob_skb; + + mutex_lock(&u->iolock); + unix_state_lock(sk); - if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) + if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { + unix_state_unlock(sk); + mutex_unlock(&u->iolock); return -EINVAL; + } - chunk = state->recv_actor(u->oob_skb, 0, chunk, state); - if (chunk < 0) - return -EFAULT; + oob_skb = u->oob_skb; if (!(state->flags & MSG_PEEK)) { - UNIXCB(u->oob_skb).consumed += 1; - kfree_skb(u->oob_skb); u->oob_skb = NULL; } + + unix_state_unlock(sk); + + chunk = state->recv_actor(oob_skb, 0, chunk, state); + + if (!(state->flags & MSG_PEEK)) { + UNIXCB(oob_skb).consumed += 1; + kfree_skb(oob_skb); + } + + mutex_unlock(&u->iolock); + + if (chunk < 0) + return -EFAULT; + state->msg->msg_flags |= MSG_OOB; return 1; } @@ -2498,13 +2523,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, if (unlikely(flags & MSG_OOB)) { err = -EOPNOTSUPP; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) - mutex_lock(&u->iolock); - unix_state_lock(sk); - err = unix_stream_recv_urg(state); - - unix_state_unlock(sk); - mutex_unlock(&u->iolock); #endif goto out; } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 177e883f451e..20f53575b5c9 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -105,6 +105,9 @@ static void unix_bpf_check_needs_rebuild(struct proto *ops) int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { + if (sk->sk_type != SOCK_DGRAM) + return -EOPNOTSUPP; + if (restore) { sk->sk_write_space = psock->saved_write_space; WRITE_ONCE(sk->sk_prot, psock->sk_proto); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e0c2c992ad9c..4f7c99dfd16c 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -357,11 +357,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock) static void virtio_vsock_reset_sock(struct sock *sk) { - lock_sock(sk); + /* vmci_transport.c doesn't take sk_lock here either. At least we're + * under vsock_table_lock so the sock cannot disappear while we're + * executing. + */ + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk_error_report(sk); - release_sock(sk); } static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 169ba8b72a63..081e7ae93cb1 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk, virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk); + break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index a20aec9d7393..2bf269390163 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]); nla_for_each_attr(nla, attrs, len, remaining) { - int err = xfrm_xlate64_attr(dst, nla); + int err; + switch (type) { + case XFRM_MSG_NEWSPDINFO: + err = xfrm_nla_cpy(dst, nla, nla_len(nla)); + break; + default: + err = xfrm_xlate64_attr(dst, nla); + break; + } if (err) return err; } @@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src /* Calculates len of translated 64-bit message. */ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, - struct nlattr *attrs[XFRMA_MAX+1]) + struct nlattr *attrs[XFRMA_MAX + 1], + int maxtype) { size_t len = nlmsg_len(src); @@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, case XFRM_MSG_POLEXPIRE: len += 8; break; + case XFRM_MSG_NEWSPDINFO: + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + return len; default: break; } + /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please + * correct both 64=>32-bit and 32=>64-bit translators to copy + * new attributes. + */ + if (WARN_ON_ONCE(maxtype)) + return len; + if (attrs[XFRMA_SA]) len += 4; if (attrs[XFRMA_POLICY]) @@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, struct nlattr *attrs[XFRMA_MAX+1], - size_t size, u8 type, struct netlink_ext_ack *extack) + size_t size, u8 type, int maxtype, + struct netlink_ext_ack *extack) { size_t pos; int i; @@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, } pos = dst->nlmsg_len; + if (maxtype) { + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO); + + for (i = 1; i <= maxtype; i++) { + int err; + + if (!attrs[i]) + continue; + + /* just copy - no need for translation */ + err = xfrm_attr_cpy32(dst, &pos, attrs[i], size, + nla_len(attrs[i]), nla_len(attrs[i])); + if (err) + return err; + } + return 0; + } + for (i = 1; i < XFRMA_MAX + 1; i++) { int err; @@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (err < 0) return ERR_PTR(err); - len = xfrm_user_rcv_calculate_len64(h32, attrs); + len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype); /* The message doesn't need translation */ if (len == nlmsg_len(h32)) return NULL; @@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (!h64) return ERR_PTR(-ENOMEM); - err = xfrm_xlate32(h64, h32, attrs, len, type, extack); + err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack); if (err < 0) { kvfree(h64); return ERR_PTR(err); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2e8afe078d61..cb40ff0ff28d 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) break; } - WARN_ON(!pos); + WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list)); if (--pos->users) return; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 827d84255021..7f881f5a5897 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; -static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; @@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2091,15 +2090,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - retry: - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); rcu_read_lock(); - - chain = policy_hash_direct(net, daddr, saddr, family, dir); - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); - goto retry; - } + retry: + do { + sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2130,15 +2126,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); + if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; - } - if (ret && !xfrm_pol_hold_rcu(ret)) { - rcu_read_unlock(); + if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; - } fail: rcu_read_unlock(); @@ -4089,6 +4081,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); + seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); rv = xfrm_statistics_init(net); @@ -4133,7 +4126,6 @@ void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); - seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b47d613409b7..7aff641c717d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, err = link->doit(skb, nlh, attrs); + /* We need to free skb allocated in xfrm_alloc_compat() before + * returning from this function, because consume_skb() won't take + * care of frag_list since netlink destructor sets + * sbk->head to NULL. (see netlink_skb_destructor()) + */ + if (skb_has_frag_list(skb)) { + kfree_skb(skb_shinfo(skb)->frag_list); + skb_shinfo(skb)->frag_list = NULL; + } + err: kvfree(nlh64); return err; diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c index 34b64394ed9c..f0c5d95084de 100644 --- a/samples/bpf/xdp1_kern.c +++ b/samples/bpf/xdp1_kern.c @@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index c787f4b49646..d8a64ab077b0 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index d3ecdc18b9c1..9e225c96b77e 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -841,7 +841,7 @@ int main(int argc, char **argv) memset(cpu, 0, n_cpus * sizeof(int)); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n", long_options, &longindex)) != -1) { switch (opt) { case 'd': diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 33d0bdebbed8..49d7a6ad7e39 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2018 Intel Corporation. */ -#include <asm/barrier.h> #include <errno.h> #include <getopt.h> #include <libgen.h> #include <linux/bpf.h> -#include <linux/compiler.h> #include <linux/if_link.h> #include <linux/if_xdp.h> #include <linux/if_ether.h> @@ -653,17 +651,15 @@ out: return result; } -__sum16 ip_fast_csum(const void *iph, unsigned int ihl); - /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * This function code has been taken from * Linux kernel lib/checksum.c */ -__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - return (__force __sum16)~do_csum(iph, ihl * 4); + return (__sum16)~do_csum(iph, ihl * 4); } /* @@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) */ static inline __sum16 csum_fold(__wsum csum) { - u32 sum = (__force u32)csum; + u32 sum = (u32)csum; sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); - return (__force __sum16)~sum; + return (__sum16)~sum; } /* @@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { - unsigned long long s = (__force u32)sum; + unsigned long long s = (u32)sum; - s += (__force u32)saddr; - s += (__force u32)daddr; + s += (u32)saddr; + s += (u32)daddr; #ifdef __BIG_ENDIAN__ s += proto + len; #else s += (proto + len) << 8; #endif - return (__force __wsum)from64to32(s); + return (__wsum)from64to32(s); } /* diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index a335393157eb..933194257a24 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -123,7 +123,7 @@ function root_check_run_with_sudo() { if [ "$EUID" -ne 0 ]; then if [ -x $0 ]; then # Directly executable use sudo info "Not root, running with sudo" - sudo "$0" "$@" + sudo -E "$0" "$@" exit $? fi err 4 "cannot perform sudo run of $0" diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index 56c5f5af350f..cff51f861506 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -13,13 +13,15 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh # Set some default params, if they didn't get set -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely if [ -n "$DEST_IP" ]; then - validate_addr $DEST_IP - read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) fi if [ -n "$DST_PORT" ]; then read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) @@ -62,8 +64,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst_min $DST_MIN" - pg_set $dev "dst_max $DST_MAX" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 6e0effabca59..3578d0aa4ac5 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -17,14 +17,16 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh # Set some default params, if they didn't get set -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$BURST" ] && BURST=32 [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely if [ -n "$DEST_IP" ]; then - validate_addr $DEST_IP - read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) fi if [ -n "$DST_PORT" ]; then read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) @@ -52,8 +54,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst_min $DST_MIN" - pg_set $dev "dst_max $DST_MAX" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range diff --git a/scripts/checkversion.pl b/scripts/checkversion.pl index f67b125c5269..94cd49eff605 100755 --- a/scripts/checkversion.pl +++ b/scripts/checkversion.pl @@ -1,10 +1,10 @@ #! /usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # -# checkversion find uses of LINUX_VERSION_CODE or KERNEL_VERSION -# without including <linux/version.h>, or cases of -# including <linux/version.h> that don't need it. -# Copyright (C) 2003, Randy Dunlap <rdunlap@xenotime.net> +# checkversion finds uses of all macros in <linux/version.h> +# where the source files do not #include <linux/version.h>; or cases +# of including <linux/version.h> where it is not needed. +# Copyright (C) 2003, Randy Dunlap <rdunlap@infradead.org> use strict; @@ -13,7 +13,8 @@ $| = 1; my $debugging; foreach my $file (@ARGV) { - next if $file =~ "include/linux/version\.h"; + next if $file =~ "include/generated/uapi/linux/version\.h"; + next if $file =~ "usr/include/linux/version\.h"; # Open this file. open( my $f, '<', $file ) or die "Can't open $file: $!\n"; @@ -41,8 +42,11 @@ foreach my $file (@ARGV) { $iLinuxVersion = $. if m/^\s*#\s*include\s*<linux\/version\.h>/o; } - # Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, UTS_RELEASE - if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/)) { + # Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, + # LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL + if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/) || + ($_ =~ /LINUX_VERSION_MAJOR/) || ($_ =~ /LINUX_VERSION_PATCHLEVEL/) || + ($_ =~ /LINUX_VERSION_SUBLEVEL/)) { $fUseVersion = 1; last if $iLinuxVersion; } diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index c17e48020ec3..8f6b13ae46bf 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -173,39 +173,6 @@ my $mcount_regex; # Find the call site to mcount (return offset) my $mcount_adjust; # Address adjustment to mcount offset my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command -my $can_use_local = 0; # If we can use local function references - -# Shut up recordmcount if user has older objcopy -my $quiet_recordmcount = ".tmp_quiet_recordmcount"; -my $print_warning = 1; -$print_warning = 0 if ( -f $quiet_recordmcount); - -## -# check_objcopy - whether objcopy supports --globalize-symbols -# -# --globalize-symbols came out in 2.17, we must test the version -# of objcopy, and if it is less than 2.17, then we can not -# record local functions. -sub check_objcopy -{ - open (IN, "$objcopy --version |") or die "error running $objcopy"; - while (<IN>) { - if (/objcopy.*\s(\d+)\.(\d+)/) { - $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17)); - last; - } - } - close (IN); - - if (!$can_use_local && $print_warning) { - print STDERR "WARNING: could not find objcopy version or version " . - "is less than 2.17.\n" . - "\tLocal function references are disabled.\n"; - open (QUIET, ">$quiet_recordmcount"); - printf QUIET "Disables the warning from recordmcount.pl\n"; - close QUIET; - } -} if ($arch =~ /(x86(_64)?)|(i386)/) { if ($bits == 64) { @@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) { my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s"; my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o"; -check_objcopy(); - # # Step 1: find all the local (static functions) and weak symbols. # 't' is local, 'w/W' is weak @@ -473,11 +438,6 @@ sub update_funcs # is this function static? If so, note this fact. if (defined $locals{$ref_func}) { - - # only use locals if objcopy supports globalize-symbols - if (!$can_use_local) { - return; - } $convert{$ref_func} = 1; } diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 74f8aadfd4cb..7011fbe003ff 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -17,7 +17,7 @@ Usage: $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func Wait some times but not too much, the script is a bit slow. Break the pipe (Ctrl + Z) - $ scripts/draw_functrace.py < raw_trace_func > draw_functrace + $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace Then you have your drawn trace in draw_functrace """ @@ -103,10 +103,10 @@ def parseLine(line): line = line.strip() if line.startswith("#"): raise CommentLineException - m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) + m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line) if m is None: raise BrokenLineException - return (m.group(1), m.group(2), m.group(3)) + return (m.group(2), m.group(3), m.group(4)) def main(): diff --git a/security/security.c b/security/security.c index 09533cbb7221..9ffa9e9c5c55 100644 --- a/security/security.c +++ b/security/security.c @@ -58,10 +58,11 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_MMIOTRACE] = "unsafe mmio", [LOCKDOWN_DEBUGFS] = "debugfs access", [LOCKDOWN_XMON_WR] = "xmon write access", + [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", - [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM", + [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM", [LOCKDOWN_PERF] = "unsafe use of perf", [LOCKDOWN_TRACEFS] = "use of tracefs", [LOCKDOWN_XMON_RW] = "xmon read and write access", diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index defc5ef35c66..0ae1b718194a 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) rc = sidtab_init(s); if (rc) { pr_err("SELinux: out of memory on SID table init\n"); - goto out; + return rc; } head = p->ocontexts[OCON_ISID]; @@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) if (sid == SECSID_NULL) { pr_err("SELinux: SID 0 was assigned a context.\n"); sidtab_destroy(s); - goto out; + return -EINVAL; } /* Ignore initial SIDs unused by this kernel. */ @@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) pr_err("SELinux: unable to load initial SID %s.\n", name); sidtab_destroy(s); - goto out; + return rc; } } - rc = 0; -out: - return rc; + return 0; } int policydb_class_isvalid(struct policydb *p, unsigned int class) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 83b79edfa52d..439a358ecfe9 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -215,7 +215,7 @@ static int snd_dma_continuous_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return remap_pfn_range(area, area->vm_start, - dmab->addr >> PAGE_SHIFT, + page_to_pfn(virt_to_page(dmab->area)), area->vm_end - area->vm_start, area->vm_page_prot); } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 6a2971a7e6a1..71323d807dbf 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -246,12 +246,15 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; - if (substream->ops->mmap) + if (substream->ops->mmap || substream->ops->page) return true; switch (substream->dma_buffer.dev.type) { case SNDRV_DMA_TYPE_UNKNOWN: - return false; + /* we can't know the device, so just assume that the driver does + * everything right + */ + return true; case SNDRV_DMA_TYPE_CONTINUOUS: case SNDRV_DMA_TYPE_VMALLOC: return true; diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index b9c2ce2b8d5a..84d78630463e 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -514,10 +514,11 @@ static int check_and_subscribe_port(struct snd_seq_client *client, return err; } -static void delete_and_unsubscribe_port(struct snd_seq_client *client, - struct snd_seq_client_port *port, - struct snd_seq_subscribers *subs, - bool is_src, bool ack) +/* called with grp->list_mutex held */ +static void __delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) { struct snd_seq_port_subs_info *grp; struct list_head *list; @@ -525,7 +526,6 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, grp = is_src ? &port->c_src : &port->c_dest; list = is_src ? &subs->src_list : &subs->dest_list; - down_write(&grp->list_mutex); write_lock_irq(&grp->list_lock); empty = list_empty(list); if (!empty) @@ -535,6 +535,18 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, if (!empty) unsubscribe_port(client, port, grp, &subs->info, ack); +} + +static void delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) +{ + struct snd_seq_port_subs_info *grp; + + grp = is_src ? &port->c_src : &port->c_dest; + down_write(&grp->list_mutex); + __delete_and_unsubscribe_port(client, port, subs, is_src, ack); up_write(&grp->list_mutex); } @@ -590,27 +602,30 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector, struct snd_seq_client_port *dest_port, struct snd_seq_port_subscribe *info) { - struct snd_seq_port_subs_info *src = &src_port->c_src; + struct snd_seq_port_subs_info *dest = &dest_port->c_dest; struct snd_seq_subscribers *subs; int err = -ENOENT; - down_write(&src->list_mutex); + /* always start from deleting the dest port for avoiding concurrent + * deletions + */ + down_write(&dest->list_mutex); /* look for the connection */ - list_for_each_entry(subs, &src->list_head, src_list) { + list_for_each_entry(subs, &dest->list_head, dest_list) { if (match_subs_info(info, &subs->info)) { - atomic_dec(&subs->ref_count); /* mark as not ready */ + __delete_and_unsubscribe_port(dest_client, dest_port, + subs, false, + connector->number != dest_client->number); err = 0; break; } } - up_write(&src->list_mutex); + up_write(&dest->list_mutex); if (err < 0) return err; delete_and_unsubscribe_port(src_client, src_port, subs, true, connector->number != src_client->number); - delete_and_unsubscribe_port(dest_client, dest_port, subs, false, - connector->number != dest_client->number); kfree(subs); return 0; } diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c index 0ef242fdd3bc..fff18b5d4e05 100644 --- a/sound/firewire/oxfw/oxfw-stream.c +++ b/sound/firewire/oxfw/oxfw-stream.c @@ -153,7 +153,7 @@ static int init_stream(struct snd_oxfw *oxfw, struct amdtp_stream *stream) struct cmp_connection *conn; enum cmp_direction c_dir; enum amdtp_stream_direction s_dir; - unsigned int flags = CIP_UNAWARE_SYT; + unsigned int flags = 0; int err; if (!(oxfw->quirks & SND_OXFW_QUIRK_BLOCKING_TRANSMISSION)) @@ -161,6 +161,13 @@ static int init_stream(struct snd_oxfw *oxfw, struct amdtp_stream *stream) else flags |= CIP_BLOCKING; + // OXFW 970/971 has no function to generate playback timing according to the sequence + // of value in syt field, thus the packet should include NO_INFO value in the field. + // However, some models just ignore data blocks in packet with NO_INFO for audio data + // processing. + if (!(oxfw->quirks & SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET)) + flags |= CIP_UNAWARE_SYT; + if (stream == &oxfw->tx_stream) { conn = &oxfw->out_conn; c_dir = CMP_OUTPUT; diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 84971d78d152..cb5b5e3a481b 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -159,8 +159,10 @@ static int detect_quirks(struct snd_oxfw *oxfw, const struct ieee1394_device_id return snd_oxfw_scs1x_add(oxfw); } - if (entry->vendor_id == OUI_APOGEE && entry->model_id == MODEL_DUET_FW) - oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION; + if (entry->vendor_id == OUI_APOGEE && entry->model_id == MODEL_DUET_FW) { + oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION | + SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET; + } /* * TASCAM FireOne has physical control and requires a pair of additional diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h index ee47abcb0c90..c13034f6c2ca 100644 --- a/sound/firewire/oxfw/oxfw.h +++ b/sound/firewire/oxfw/oxfw.h @@ -42,6 +42,11 @@ enum snd_oxfw_quirk { SND_OXFW_QUIRK_BLOCKING_TRANSMISSION = 0x04, // Stanton SCS1.d and SCS1.m support unique transaction. SND_OXFW_QUIRK_SCS_TRANSACTION = 0x08, + // Apogee Duet FireWire ignores data blocks in packet with NO_INFO for audio data + // processing, while output level meter moves. Any value in syt field of packet takes + // the device to process audio data even if the value is invalid in a point of + // IEC 61883-1/6. + SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET = 0x10, }; /* This is an arbitrary number for convinience. */ diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index e97d00585e8e..481d8f8d3396 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3460,7 +3460,7 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol, struct hda_gen_spec *spec = codec->spec; const struct hda_input_mux *imux; struct nid_path *path; - int i, adc_idx, err = 0; + int i, adc_idx, ret, err = 0; imux = &spec->input_mux; adc_idx = kcontrol->id.index; @@ -3470,9 +3470,13 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol, if (!path || !path->ctls[type]) continue; kcontrol->private_value = path->ctls[type]; - err = func(kcontrol, ucontrol); - if (err < 0) + ret = func(kcontrol, ucontrol); + if (ret < 0) { + err = ret; break; + } + if (ret > 0) + err = 1; } mutex_unlock(&codec->control_mutex); if (err >= 0 && spec->cap_sync_hook) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 0322b289505e..0062c18b646a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -883,10 +883,11 @@ static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev) return azx_get_pos_posbuf(chip, azx_dev); } -static void azx_shutdown_chip(struct azx *chip) +static void __azx_shutdown_chip(struct azx *chip, bool skip_link_reset) { azx_stop_chip(chip); - azx_enter_link_reset(chip); + if (!skip_link_reset) + azx_enter_link_reset(chip); azx_clear_irq_pending(chip); display_power(chip, false); } @@ -895,6 +896,11 @@ static void azx_shutdown_chip(struct azx *chip) static DEFINE_MUTEX(card_list_lock); static LIST_HEAD(card_list); +static void azx_shutdown_chip(struct azx *chip) +{ + __azx_shutdown_chip(chip, false); +} + static void azx_add_card_list(struct azx *chip) { struct hda_intel *hda = container_of(chip, struct hda_intel, chip); @@ -2385,7 +2391,7 @@ static void azx_shutdown(struct pci_dev *pci) return; chip = card->private_data; if (chip && chip->running) - azx_shutdown_chip(chip); + __azx_shutdown_chip(chip, true); } /* PCI IDs */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index caaf0e8aac11..96f32eaa24df 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8274,9 +8274,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), @@ -8330,6 +8332,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -8429,6 +8432,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), @@ -8463,6 +8467,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), + SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index a5c1a2c4eae4..773a136161f1 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1041,6 +1041,7 @@ static const struct hda_fixup via_fixups[] = { }; static const struct snd_pci_quirk vt2002p_fixups[] = { + SND_PCI_QUIRK(0x1043, 0x13f7, "Asus B23E", VIA_FIXUP_POWER_SAVE), SND_PCI_QUIRK(0x1043, 0x1487, "Asus G75", VIA_FIXUP_ASUS_G75), SND_PCI_QUIRK(0x1043, 0x8532, "Asus X202E", VIA_FIXUP_INTMIC_BOOST), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", VIA_FIXUP_POWER_SAVE), diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index 8a13462e1a63..5dcf77af07af 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -36,6 +36,7 @@ config SND_SOC_COMPRESS config SND_SOC_TOPOLOGY bool + select SND_DYNAMIC_MINORS config SND_SOC_TOPOLOGY_KUNIT_TEST tristate "KUnit tests for SoC topology" diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index 9449fb40a956..3c60c5f96dcb 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -525,6 +525,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { | SND_SOC_DAIFMT_CBM_CFM, .init = cz_da7219_init, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_da7219_play_ops, SND_SOC_DAILINK_REG(designware1, dlgs, platform), }, @@ -534,6 +535,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_da7219_cap_ops, SND_SOC_DAILINK_REG(designware2, dlgs, platform), }, @@ -543,6 +545,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_max_play_ops, SND_SOC_DAILINK_REG(designware3, mx, platform), }, @@ -553,6 +556,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_dmic0_cap_ops, SND_SOC_DAILINK_REG(designware3, adau, platform), }, @@ -563,6 +567,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_dmic1_cap_ops, SND_SOC_DAILINK_REG(designware2, adau, platform), }, diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 143155a840ac..cc1ce6f22caa 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -969,7 +969,7 @@ static int acp_dma_hw_params(struct snd_soc_component *component, acp_set_sram_bank_state(rtd->acp_mmio, 0, true); /* Save for runtime private data */ - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = runtime->dma_addr; rtd->order = get_order(size); /* Fill the page table entries in ACP SRAM */ diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index 8148b0d22e88..597d7c4b2a6b 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -286,7 +286,7 @@ static int acp3x_dma_hw_params(struct snd_soc_component *component, pr_err("pinfo failed\n"); } size = params_buffer_bytes(params); - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = substream->runtime->dma_addr; rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT); config_acp3x_dma(rtd, substream->stream); return 0; diff --git a/sound/soc/amd/renoir/acp3x-pdm-dma.c b/sound/soc/amd/renoir/acp3x-pdm-dma.c index bd20622b0933..0391c28dd078 100644 --- a/sound/soc/amd/renoir/acp3x-pdm-dma.c +++ b/sound/soc/amd/renoir/acp3x-pdm-dma.c @@ -242,7 +242,7 @@ static int acp_pdm_dma_hw_params(struct snd_soc_component *component, return -EINVAL; size = params_buffer_bytes(params); period_bytes = params_period_bytes(params); - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = substream->runtime->dma_addr; rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT); config_acp_dma(rtd, substream->stream); init_pdm_ring_buffer(MEM_WINDOW_START, size, period_bytes, diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index 19438da5dfa5..7b8040e812a1 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -382,6 +382,8 @@ static const struct dev_pm_ops rn_acp_pm = { .runtime_resume = snd_rn_acp_resume, .suspend = snd_rn_acp_suspend, .resume = snd_rn_acp_resume, + .restore = snd_rn_acp_resume, + .poweroff = snd_rn_acp_suspend, }; static void snd_rn_acp_remove(struct pci_dev *pci) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index a3b784ed4f70..db16071205ba 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1559,6 +1559,7 @@ config SND_SOC_WCD934X config SND_SOC_WCD938X depends on SND_SOC_WCD938X_SDW tristate + depends on SOUNDWIRE || !SOUNDWIRE config SND_SOC_WCD938X_SDW tristate "WCD9380/WCD9385 Codec - SDW" diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index de8b83dd2c76..7bb38c370842 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -583,7 +583,10 @@ obj-$(CONFIG_SND_SOC_WCD_MBHC) += snd-soc-wcd-mbhc.o obj-$(CONFIG_SND_SOC_WCD9335) += snd-soc-wcd9335.o obj-$(CONFIG_SND_SOC_WCD934X) += snd-soc-wcd934x.o obj-$(CONFIG_SND_SOC_WCD938X) += snd-soc-wcd938x.o -obj-$(CONFIG_SND_SOC_WCD938X_SDW) += snd-soc-wcd938x-sdw.o +ifdef CONFIG_SND_SOC_WCD938X_SDW +# avoid link failure by forcing sdw code built-in when needed +obj-$(CONFIG_SND_SOC_WCD938X) += snd-soc-wcd938x-sdw.o +endif obj-$(CONFIG_SND_SOC_WL1273) += snd-soc-wl1273.o obj-$(CONFIG_SND_SOC_WM0010) += snd-soc-wm0010.o obj-$(CONFIG_SND_SOC_WM1250_EV1) += snd-soc-wm1250-ev1.o diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index eff013f295be..99c022be94a6 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -405,7 +405,7 @@ static const struct regmap_config cs42l42_regmap = { .use_single_write = true, }; -static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false); +static DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 100, true); static DECLARE_TLV_DB_SCALE(mixer_tlv, -6300, 100, true); static const char * const cs42l42_hpf_freq_text[] = { @@ -425,34 +425,23 @@ static SOC_ENUM_SINGLE_DECL(cs42l42_wnf3_freq_enum, CS42L42_ADC_WNF_HPF_CTL, CS42L42_ADC_WNF_CF_SHIFT, cs42l42_wnf3_freq_text); -static const char * const cs42l42_wnf05_freq_text[] = { - "280Hz", "315Hz", "350Hz", "385Hz", - "420Hz", "455Hz", "490Hz", "525Hz" -}; - -static SOC_ENUM_SINGLE_DECL(cs42l42_wnf05_freq_enum, CS42L42_ADC_WNF_HPF_CTL, - CS42L42_ADC_WNF_CF_SHIFT, - cs42l42_wnf05_freq_text); - static const struct snd_kcontrol_new cs42l42_snd_controls[] = { /* ADC Volume and Filter Controls */ SOC_SINGLE("ADC Notch Switch", CS42L42_ADC_CTL, - CS42L42_ADC_NOTCH_DIS_SHIFT, true, false), + CS42L42_ADC_NOTCH_DIS_SHIFT, true, true), SOC_SINGLE("ADC Weak Force Switch", CS42L42_ADC_CTL, CS42L42_ADC_FORCE_WEAK_VCM_SHIFT, true, false), SOC_SINGLE("ADC Invert Switch", CS42L42_ADC_CTL, CS42L42_ADC_INV_SHIFT, true, false), SOC_SINGLE("ADC Boost Switch", CS42L42_ADC_CTL, CS42L42_ADC_DIG_BOOST_SHIFT, true, false), - SOC_SINGLE_SX_TLV("ADC Volume", CS42L42_ADC_VOLUME, - CS42L42_ADC_VOL_SHIFT, 0xA0, 0x6C, adc_tlv), + SOC_SINGLE_S8_TLV("ADC Volume", CS42L42_ADC_VOLUME, -97, 12, adc_tlv), SOC_SINGLE("ADC WNF Switch", CS42L42_ADC_WNF_HPF_CTL, CS42L42_ADC_WNF_EN_SHIFT, true, false), SOC_SINGLE("ADC HPF Switch", CS42L42_ADC_WNF_HPF_CTL, CS42L42_ADC_HPF_EN_SHIFT, true, false), SOC_ENUM("HPF Corner Freq", cs42l42_hpf_freq_enum), SOC_ENUM("WNF 3dB Freq", cs42l42_wnf3_freq_enum), - SOC_ENUM("WNF 05dB Freq", cs42l42_wnf05_freq_enum), /* DAC Volume and Filter Controls */ SOC_SINGLE("DACA Invert Switch", CS42L42_DAC_CTL1, @@ -471,8 +460,8 @@ static const struct snd_soc_dapm_widget cs42l42_dapm_widgets[] = { SND_SOC_DAPM_OUTPUT("HP"), SND_SOC_DAPM_DAC("DAC", NULL, CS42L42_PWR_CTL1, CS42L42_HP_PDN_SHIFT, 1), SND_SOC_DAPM_MIXER("MIXER", CS42L42_PWR_CTL1, CS42L42_MIXER_PDN_SHIFT, 1, NULL, 0), - SND_SOC_DAPM_AIF_IN("SDIN1", NULL, 0, CS42L42_ASP_RX_DAI0_EN, CS42L42_ASP_RX0_CH1_SHIFT, 0), - SND_SOC_DAPM_AIF_IN("SDIN2", NULL, 1, CS42L42_ASP_RX_DAI0_EN, CS42L42_ASP_RX0_CH2_SHIFT, 0), + SND_SOC_DAPM_AIF_IN("SDIN1", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SDIN2", NULL, 1, SND_SOC_NOPM, 0, 0), /* Playback Requirements */ SND_SOC_DAPM_SUPPLY("ASP DAI0", CS42L42_PWR_CTL1, CS42L42_ASP_DAI_PDN_SHIFT, 1, NULL, 0), @@ -630,6 +619,8 @@ static int cs42l42_pll_config(struct snd_soc_component *component) for (i = 0; i < ARRAY_SIZE(pll_ratio_table); i++) { if (pll_ratio_table[i].sclk == clk) { + cs42l42->pll_config = i; + /* Configure the internal sample rate */ snd_soc_component_update_bits(component, CS42L42_MCLK_CTL, CS42L42_INTERNAL_FS_MASK, @@ -638,14 +629,9 @@ static int cs42l42_pll_config(struct snd_soc_component *component) (pll_ratio_table[i].mclk_int != 24000000)) << CS42L42_INTERNAL_FS_SHIFT); - /* Set the MCLK src (PLL or SCLK) and the divide - * ratio - */ + snd_soc_component_update_bits(component, CS42L42_MCLK_SRC_SEL, - CS42L42_MCLK_SRC_SEL_MASK | CS42L42_MCLKDIV_MASK, - (pll_ratio_table[i].mclk_src_sel - << CS42L42_MCLK_SRC_SEL_SHIFT) | (pll_ratio_table[i].mclk_div << CS42L42_MCLKDIV_SHIFT)); /* Set up the LRCLK */ @@ -681,15 +667,6 @@ static int cs42l42_pll_config(struct snd_soc_component *component) CS42L42_FSYNC_PULSE_WIDTH_MASK, CS42L42_FRAC1_VAL(fsync - 1) << CS42L42_FSYNC_PULSE_WIDTH_SHIFT); - snd_soc_component_update_bits(component, - CS42L42_ASP_FRM_CFG, - CS42L42_ASP_5050_MASK, - CS42L42_ASP_5050_MASK); - /* Set the frame delay to 1.0 SCLK clocks */ - snd_soc_component_update_bits(component, CS42L42_ASP_FRM_CFG, - CS42L42_ASP_FSD_MASK, - CS42L42_ASP_FSD_1_0 << - CS42L42_ASP_FSD_SHIFT); /* Set the sample rates (96k or lower) */ snd_soc_component_update_bits(component, CS42L42_FS_RATE_EN, CS42L42_FS_EN_MASK, @@ -789,7 +766,18 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: - case SND_SOC_DAIFMT_LEFT_J: + /* + * 5050 mode, frame starts on falling edge of LRCLK, + * frame delayed by 1.0 SCLKs + */ + snd_soc_component_update_bits(component, + CS42L42_ASP_FRM_CFG, + CS42L42_ASP_STP_MASK | + CS42L42_ASP_5050_MASK | + CS42L42_ASP_FSD_MASK, + CS42L42_ASP_5050_MASK | + (CS42L42_ASP_FSD_1_0 << + CS42L42_ASP_FSD_SHIFT)); break; default: return -EINVAL; @@ -819,6 +807,25 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) return 0; } +static int cs42l42_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); + + /* + * Sample rates < 44.1 kHz would produce an out-of-range SCLK with + * a standard I2S frame. If the machine driver sets SCLK it must be + * legal. + */ + if (cs42l42->sclk) + return 0; + + /* Machine driver has not set a SCLK, limit bottom end to 44.1 kHz */ + return snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + 44100, 192000); +} + static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) @@ -832,6 +839,10 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, cs42l42->srate = params_rate(params); cs42l42->bclk = snd_soc_params_to_bclk(params); + /* I2S frame always has 2 channels even for mono audio */ + if (channels == 1) + cs42l42->bclk *= 2; + switch(substream->stream) { case SNDRV_PCM_STREAM_CAPTURE: if (channels == 2) { @@ -855,6 +866,17 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_CH2_AP_RES, CS42L42_ASP_RX_CH_AP_MASK | CS42L42_ASP_RX_CH_RES_MASK, val); + + /* Channel B comes from the last active channel */ + snd_soc_component_update_bits(component, CS42L42_SP_RX_CH_SEL, + CS42L42_SP_RX_CHB_SEL_MASK, + (channels - 1) << CS42L42_SP_RX_CHB_SEL_SHIFT); + + /* Both LRCLK slots must be enabled */ + snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_EN, + CS42L42_ASP_RX0_CH_EN_MASK, + BIT(CS42L42_ASP_RX0_CH1_SHIFT) | + BIT(CS42L42_ASP_RX0_CH2_SHIFT)); break; default: break; @@ -900,13 +922,21 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) */ regmap_multi_reg_write(cs42l42->regmap, cs42l42_to_osc_seq, ARRAY_SIZE(cs42l42_to_osc_seq)); + + /* Must disconnect PLL before stopping it */ + snd_soc_component_update_bits(component, + CS42L42_MCLK_SRC_SEL, + CS42L42_MCLK_SRC_SEL_MASK, + 0); + usleep_range(100, 200); + snd_soc_component_update_bits(component, CS42L42_PLL_CTL1, CS42L42_PLL_START_MASK, 0); } } else { if (!cs42l42->stream_use) { /* SCLK must be running before codec unmute */ - if ((cs42l42->bclk < 11289600) && (cs42l42->sclk < 11289600)) { + if (pll_ratio_table[cs42l42->pll_config].mclk_src_sel) { snd_soc_component_update_bits(component, CS42L42_PLL_CTL1, CS42L42_PLL_START_MASK, 1); @@ -927,6 +957,12 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) CS42L42_PLL_LOCK_TIMEOUT_US); if (ret < 0) dev_warn(component->dev, "PLL failed to lock: %d\n", ret); + + /* PLL must be running to drive glitchless switch logic */ + snd_soc_component_update_bits(component, + CS42L42_MCLK_SRC_SEL, + CS42L42_MCLK_SRC_SEL_MASK, + CS42L42_MCLK_SRC_SEL_MASK); } /* Mark SCLK as present, turn off internal oscillator */ @@ -960,8 +996,8 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) SNDRV_PCM_FMTBIT_S24_LE |\ SNDRV_PCM_FMTBIT_S32_LE ) - static const struct snd_soc_dai_ops cs42l42_ops = { + .startup = cs42l42_dai_startup, .hw_params = cs42l42_pcm_hw_params, .set_fmt = cs42l42_set_dai_fmt, .set_sysclk = cs42l42_set_sysclk, diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 206b3c81d3e0..8734f6828f3e 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -653,6 +653,8 @@ /* Page 0x25 Audio Port Registers */ #define CS42L42_SP_RX_CH_SEL (CS42L42_PAGE_25 + 0x01) +#define CS42L42_SP_RX_CHB_SEL_SHIFT 2 +#define CS42L42_SP_RX_CHB_SEL_MASK (3 << CS42L42_SP_RX_CHB_SEL_SHIFT) #define CS42L42_SP_RX_ISOC_CTL (CS42L42_PAGE_25 + 0x02) #define CS42L42_SP_RX_RSYNC_SHIFT 6 @@ -775,6 +777,7 @@ struct cs42l42_private { struct gpio_desc *reset_gpio; struct completion pdn_done; struct snd_soc_jack *jack; + int pll_config; int bclk; u32 sclk; u32 srate; diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 15bd8335f667..db88be48c998 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -828,36 +828,6 @@ static void nau8824_int_status_clear_all(struct regmap *regmap) } } -static void nau8824_dapm_disable_pin(struct nau8824 *nau8824, const char *pin) -{ - struct snd_soc_dapm_context *dapm = nau8824->dapm; - const char *prefix = dapm->component->name_prefix; - char prefixed_pin[80]; - - if (prefix) { - snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", - prefix, pin); - snd_soc_dapm_disable_pin(dapm, prefixed_pin); - } else { - snd_soc_dapm_disable_pin(dapm, pin); - } -} - -static void nau8824_dapm_enable_pin(struct nau8824 *nau8824, const char *pin) -{ - struct snd_soc_dapm_context *dapm = nau8824->dapm; - const char *prefix = dapm->component->name_prefix; - char prefixed_pin[80]; - - if (prefix) { - snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", - prefix, pin); - snd_soc_dapm_force_enable_pin(dapm, prefixed_pin); - } else { - snd_soc_dapm_force_enable_pin(dapm, pin); - } -} - static void nau8824_eject_jack(struct nau8824 *nau8824) { struct snd_soc_dapm_context *dapm = nau8824->dapm; @@ -866,8 +836,8 @@ static void nau8824_eject_jack(struct nau8824 *nau8824) /* Clear all interruption status */ nau8824_int_status_clear_all(regmap); - nau8824_dapm_disable_pin(nau8824, "SAR"); - nau8824_dapm_disable_pin(nau8824, "MICBIAS"); + snd_soc_dapm_disable_pin(dapm, "SAR"); + snd_soc_dapm_disable_pin(dapm, "MICBIAS"); snd_soc_dapm_sync(dapm); /* Enable the insertion interruption, disable the ejection @@ -897,8 +867,8 @@ static void nau8824_jdet_work(struct work_struct *work) struct regmap *regmap = nau8824->regmap; int adc_value, event = 0, event_mask = 0; - nau8824_dapm_enable_pin(nau8824, "MICBIAS"); - nau8824_dapm_enable_pin(nau8824, "SAR"); + snd_soc_dapm_enable_pin(dapm, "MICBIAS"); + snd_soc_dapm_enable_pin(dapm, "SAR"); snd_soc_dapm_sync(dapm); msleep(100); @@ -909,8 +879,8 @@ static void nau8824_jdet_work(struct work_struct *work) if (adc_value < HEADSET_SARADC_THD) { event |= SND_JACK_HEADPHONE; - nau8824_dapm_disable_pin(nau8824, "SAR"); - nau8824_dapm_disable_pin(nau8824, "MICBIAS"); + snd_soc_dapm_disable_pin(dapm, "SAR"); + snd_soc_dapm_disable_pin(dapm, "MICBIAS"); snd_soc_dapm_sync(dapm); } else { event |= SND_JACK_HEADSET; diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index abcd6f483788..51ecaa2abcd1 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -44,6 +44,7 @@ static const struct reg_sequence patch_list[] = { {RT5682_I2C_CTRL, 0x000f}, {RT5682_PLL2_INTERNAL, 0x8266}, {RT5682_SAR_IL_CMD_3, 0x8365}, + {RT5682_SAR_IL_CMD_6, 0x0180}, }; void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index b504d63385b3..52d2c968b5c0 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -35,6 +35,9 @@ #include "tlv320aic31xx.h" +static int aic31xx_set_jack(struct snd_soc_component *component, + struct snd_soc_jack *jack, void *data); + static const struct reg_default aic31xx_reg_defaults[] = { { AIC31XX_CLKMUX, 0x00 }, { AIC31XX_PLLPR, 0x11 }, @@ -1256,6 +1259,13 @@ static int aic31xx_power_on(struct snd_soc_component *component) return ret; } + /* + * The jack detection configuration is in the same register + * that is used to report jack detect status so is volatile + * and not covered by the cache sync, restore it separately. + */ + aic31xx_set_jack(component, aic31xx->jack, NULL); + return 0; } diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index dcd8aeb45cb3..2e9175b37dc9 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -682,11 +682,20 @@ static int aic32x4_set_dosr(struct snd_soc_component *component, u16 dosr) static int aic32x4_set_processing_blocks(struct snd_soc_component *component, u8 r_block, u8 p_block) { - if (r_block > 18 || p_block > 25) - return -EINVAL; + struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component); + + if (aic32x4->type == AIC32X4_TYPE_TAS2505) { + if (r_block || p_block > 3) + return -EINVAL; - snd_soc_component_write(component, AIC32X4_ADCSPB, r_block); - snd_soc_component_write(component, AIC32X4_DACSPB, p_block); + snd_soc_component_write(component, AIC32X4_DACSPB, p_block); + } else { /* AIC32x4 */ + if (r_block > 18 || p_block > 25) + return -EINVAL; + + snd_soc_component_write(component, AIC32X4_ADCSPB, r_block); + snd_soc_component_write(component, AIC32X4_DACSPB, p_block); + } return 0; } @@ -695,6 +704,7 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component, unsigned int sample_rate, unsigned int channels, unsigned int bit_depth) { + struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component); u8 aosr; u16 dosr; u8 adc_resource_class, dac_resource_class; @@ -721,19 +731,28 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component, adc_resource_class = 6; dac_resource_class = 8; dosr_increment = 8; - aic32x4_set_processing_blocks(component, 1, 1); + if (aic32x4->type == AIC32X4_TYPE_TAS2505) + aic32x4_set_processing_blocks(component, 0, 1); + else + aic32x4_set_processing_blocks(component, 1, 1); } else if (sample_rate <= 96000) { aosr = 64; adc_resource_class = 6; dac_resource_class = 8; dosr_increment = 4; - aic32x4_set_processing_blocks(component, 1, 9); + if (aic32x4->type == AIC32X4_TYPE_TAS2505) + aic32x4_set_processing_blocks(component, 0, 1); + else + aic32x4_set_processing_blocks(component, 1, 9); } else if (sample_rate == 192000) { aosr = 32; adc_resource_class = 3; dac_resource_class = 4; dosr_increment = 2; - aic32x4_set_processing_blocks(component, 13, 19); + if (aic32x4->type == AIC32X4_TYPE_TAS2505) + aic32x4_set_processing_blocks(component, 0, 1); + else + aic32x4_set_processing_blocks(component, 13, 19); } else { dev_err(component->dev, "Sampling rate not supported\n"); return -EINVAL; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 549d98241dae..fe15cbc7bcaf 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -747,7 +747,6 @@ static void wm_adsp2_init_debugfs(struct wm_adsp *dsp, static void wm_adsp2_cleanup_debugfs(struct wm_adsp *dsp) { wm_adsp_debugfs_clear(dsp); - debugfs_remove_recursive(dsp->debugfs_root); } #else static inline void wm_adsp2_init_debugfs(struct wm_adsp *dsp, diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 4124aa2fc247..5db2f4865bbb 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -127,7 +127,7 @@ static void sst_fill_alloc_params(struct snd_pcm_substream *substream, snd_pcm_uframes_t period_size; ssize_t periodbytes; ssize_t buffer_bytes = snd_pcm_lib_buffer_bytes(substream); - u32 buffer_addr = virt_to_phys(substream->dma_buffer.area); + u32 buffer_addr = substream->runtime->dma_addr; channels = substream->runtime->channels; period_size = substream->runtime->period_size; @@ -233,7 +233,6 @@ static int sst_platform_alloc_stream(struct snd_pcm_substream *substream, /* set codec params and inform SST driver the same */ sst_fill_pcm_params(substream, ¶m); sst_fill_alloc_params(substream, &alloc_params); - substream->runtime->dma_area = substream->dma_buffer.area; str_params.sparams = param; str_params.aparams = alloc_params; str_params.codec = SST_CODEC_TYPE_PCM; diff --git a/sound/soc/intel/boards/sof_da7219_max98373.c b/sound/soc/intel/boards/sof_da7219_max98373.c index 896251d742fe..b7b3b0bf994a 100644 --- a/sound/soc/intel/boards/sof_da7219_max98373.c +++ b/sound/soc/intel/boards/sof_da7219_max98373.c @@ -404,7 +404,7 @@ static int audio_probe(struct platform_device *pdev) return -ENOMEM; /* By default dais[0] is configured for max98373 */ - if (!strcmp(pdev->name, "sof_da7219_max98360a")) { + if (!strcmp(pdev->name, "sof_da7219_mx98360a")) { dais[0] = (struct snd_soc_dai_link) { .name = "SSP1-Codec", .id = 0, diff --git a/sound/soc/kirkwood/kirkwood-dma.c b/sound/soc/kirkwood/kirkwood-dma.c index c2a5933bfcfc..700a18561a94 100644 --- a/sound/soc/kirkwood/kirkwood-dma.c +++ b/sound/soc/kirkwood/kirkwood-dma.c @@ -104,8 +104,6 @@ static int kirkwood_dma_open(struct snd_soc_component *component, int err; struct snd_pcm_runtime *runtime = substream->runtime; struct kirkwood_dma_data *priv = kirkwood_priv(substream); - const struct mbus_dram_target_info *dram; - unsigned long addr; snd_soc_set_runtime_hwparams(substream, &kirkwood_dma_snd_hw); @@ -142,20 +140,14 @@ static int kirkwood_dma_open(struct snd_soc_component *component, writel((unsigned int)-1, priv->io + KIRKWOOD_ERR_MASK); } - dram = mv_mbus_dram_info(); - addr = substream->dma_buffer.addr; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { if (priv->substream_play) return -EBUSY; priv->substream_play = substream; - kirkwood_dma_conf_mbus_windows(priv->io, - KIRKWOOD_PLAYBACK_WIN, addr, dram); } else { if (priv->substream_rec) return -EBUSY; priv->substream_rec = substream; - kirkwood_dma_conf_mbus_windows(priv->io, - KIRKWOOD_RECORD_WIN, addr, dram); } return 0; @@ -182,6 +174,23 @@ static int kirkwood_dma_close(struct snd_soc_component *component, return 0; } +static int kirkwood_dma_hw_params(struct snd_soc_component *component, + struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct kirkwood_dma_data *priv = kirkwood_priv(substream); + const struct mbus_dram_target_info *dram = mv_mbus_dram_info(); + unsigned long addr = substream->runtime->dma_addr; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + kirkwood_dma_conf_mbus_windows(priv->io, + KIRKWOOD_PLAYBACK_WIN, addr, dram); + else + kirkwood_dma_conf_mbus_windows(priv->io, + KIRKWOOD_RECORD_WIN, addr, dram); + return 0; +} + static int kirkwood_dma_prepare(struct snd_soc_component *component, struct snd_pcm_substream *substream) { @@ -246,6 +255,7 @@ const struct snd_soc_component_driver kirkwood_soc_component = { .name = DRV_NAME, .open = kirkwood_dma_open, .close = kirkwood_dma_close, + .hw_params = kirkwood_dma_hw_params, .prepare = kirkwood_dma_prepare, .pointer = kirkwood_dma_pointer, .pcm_construct = kirkwood_dma_new, diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 3a5e84e16a87..c8dfd0de30e4 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -148,86 +148,75 @@ int snd_soc_component_set_bias_level(struct snd_soc_component *component, return soc_component_ret(component, ret); } -static int soc_component_pin(struct snd_soc_component *component, - const char *pin, - int (*pin_func)(struct snd_soc_dapm_context *dapm, - const char *pin)) -{ - struct snd_soc_dapm_context *dapm = - snd_soc_component_get_dapm(component); - char *full_name; - int ret; - - if (!component->name_prefix) { - ret = pin_func(dapm, pin); - goto end; - } - - full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin); - if (!full_name) { - ret = -ENOMEM; - goto end; - } - - ret = pin_func(dapm, full_name); - kfree(full_name); -end: - return soc_component_ret(component, ret); -} - int snd_soc_component_enable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_enable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_enable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin); int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_enable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_enable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin_unlocked); int snd_soc_component_disable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_disable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_disable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin); int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_disable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_disable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin_unlocked); int snd_soc_component_nc_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_nc_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_nc_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin); int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_nc_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_nc_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin_unlocked); int snd_soc_component_get_pin_status(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_get_pin_status); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_get_pin_status(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_get_pin_status); int snd_soc_component_force_enable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_force_enable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin); @@ -235,7 +224,9 @@ int snd_soc_component_force_enable_pin_unlocked( struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_force_enable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked); diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 4bce89b5ea40..4447f515e8b1 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -278,6 +278,8 @@ config SND_SOC_SOF_HDA config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE tristate + select SOUNDWIRE_INTEL if SND_SOC_SOF_INTEL_SOUNDWIRE + select SND_INTEL_SOUNDWIRE_ACPI if SND_SOC_SOF_INTEL_SOUNDWIRE config SND_SOC_SOF_INTEL_SOUNDWIRE tristate "SOF support for SoundWire" @@ -285,8 +287,6 @@ config SND_SOC_SOF_INTEL_SOUNDWIRE depends on SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE depends on ACPI && SOUNDWIRE depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y) - select SOUNDWIRE_INTEL - select SND_INTEL_SOUNDWIRE_ACPI help This adds support for SoundWire with Sound Open Firmware for Intel(R) platforms. diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c index c91aa951df22..acfeca42604c 100644 --- a/sound/soc/sof/intel/hda-ipc.c +++ b/sound/soc/sof/intel/hda-ipc.c @@ -107,8 +107,8 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev) } else { /* reply correct size ? */ if (reply.hdr.size != msg->reply_size && - /* getter payload is never known upfront */ - !(reply.hdr.cmd & SOF_IPC_GLB_PROBE)) { + /* getter payload is never known upfront */ + ((reply.hdr.cmd & SOF_GLB_TYPE_MASK) != SOF_IPC_GLB_PROBE)) { dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n", msg->reply_size, reply.hdr.size); ret = -EINVAL; diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index e1e368ff2b12..891e6e1b9121 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -187,12 +187,16 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev) int hda_sdw_startup(struct snd_sof_dev *sdev) { struct sof_intel_hda_dev *hdev; + struct snd_sof_pdata *pdata = sdev->pdata; hdev = sdev->pdata->hw_pdata; if (!hdev->sdw) return 0; + if (pdata->machine && !pdata->machine->mach_params.link_mask) + return 0; + return sdw_intel_startup(hdev->sdw); } @@ -1002,6 +1006,14 @@ static int hda_generic_machine_select(struct snd_sof_dev *sdev) hda_mach->mach_params.dmic_num = dmic_num; pdata->machine = hda_mach; pdata->tplg_filename = tplg_filename; + + if (codec_num == 2) { + /* + * Prevent SoundWire links from starting when an external + * HDaudio codec is used + */ + hda_mach->mach_params.link_mask = 0; + } } } diff --git a/sound/soc/uniphier/aio-dma.c b/sound/soc/uniphier/aio-dma.c index 3c1628a3a1ac..3d9736e7381f 100644 --- a/sound/soc/uniphier/aio-dma.c +++ b/sound/soc/uniphier/aio-dma.c @@ -198,7 +198,7 @@ static int uniphier_aiodma_mmap(struct snd_soc_component *component, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return remap_pfn_range(vma, vma->vm_start, - substream->dma_buffer.addr >> PAGE_SHIFT, + substream->runtime->dma_addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c index 1d59fb668c77..91afea9d5de6 100644 --- a/sound/soc/xilinx/xlnx_formatter_pcm.c +++ b/sound/soc/xilinx/xlnx_formatter_pcm.c @@ -452,8 +452,8 @@ static int xlnx_formatter_pcm_hw_params(struct snd_soc_component *component, stream_data->buffer_size = size; - low = lower_32_bits(substream->dma_buffer.addr); - high = upper_32_bits(substream->dma_buffer.addr); + low = lower_32_bits(runtime->dma_addr); + high = upper_32_bits(runtime->dma_addr); writel(low, stream_data->mmio + XLNX_AUD_BUFF_ADDR_LSB); writel(high, stream_data->mmio + XLNX_AUD_BUFF_ADDR_MSB); diff --git a/sound/usb/card.c b/sound/usb/card.c index 2f6a62416c05..a1f8c3a026f5 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -907,7 +907,7 @@ static void usb_audio_disconnect(struct usb_interface *intf) } } - if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND) + if (chip->quirk_type == QUIRK_SETUP_DISABLE_AUTOSUSPEND) usb_enable_autosuspend(interface_to_usbdev(intf)); chip->num_interfaces--; diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 52de52288e10..14456f61539e 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -324,6 +324,12 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, sources[ret - 1], visited, validate); if (ret > 0) { + /* + * For Samsung USBC Headset (AKG), setting clock selector again + * will result in incorrect default clock setting problems + */ + if (chip->usb_id == USB_ID(0x04e8, 0xa051)) + return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) return err; diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index f4cdaf1ba44a..9b713b4a5ec4 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1816,6 +1816,15 @@ static void get_connector_control_name(struct usb_mixer_interface *mixer, strlcat(name, " - Output Jack", name_size); } +/* get connector value to "wake up" the USB audio */ +static int connector_mixer_resume(struct usb_mixer_elem_list *list) +{ + struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); + + get_connector_value(cval, NULL, NULL); + return 0; +} + /* Build a mixer control for a UAC connector control (jack-detect) */ static void build_connector_control(struct usb_mixer_interface *mixer, const struct usbmix_name_map *imap, @@ -1833,6 +1842,10 @@ static void build_connector_control(struct usb_mixer_interface *mixer, if (!cval) return; snd_usb_mixer_elem_init_std(&cval->head, mixer, term->id); + + /* set up a specific resume callback */ + cval->head.resume = connector_mixer_resume; + /* * UAC2: The first byte from reading the UAC2_TE_CONNECTOR control returns the * number of channels connected. @@ -3642,23 +3655,15 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) return 0; } -static int default_mixer_resume(struct usb_mixer_elem_list *list) -{ - struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); - - /* get connector value to "wake up" the USB audio */ - if (cval->val_type == USB_MIXER_BOOLEAN && cval->channels == 1) - get_connector_value(cval, NULL, NULL); - - return 0; -} - static int default_mixer_reset_resume(struct usb_mixer_elem_list *list) { - int err = default_mixer_resume(list); + int err; - if (err < 0) - return err; + if (list->resume) { + err = list->resume(list); + if (err < 0) + return err; + } return restore_mixer_value(list); } @@ -3697,7 +3702,7 @@ void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, list->id = unitid; list->dump = snd_usb_mixer_dump_cval; #ifdef CONFIG_PM - list->resume = default_mixer_resume; + list->resume = NULL; list->reset_resume = default_mixer_reset_resume; #endif } diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index f9d698a37153..3d5848d5481b 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -228,7 +228,7 @@ enum { }; static const char *const scarlett2_dim_mute_names[SCARLETT2_DIM_MUTE_COUNT] = { - "Mute", "Dim" + "Mute Playback Switch", "Dim Playback Switch" }; /* Description of each hardware port type: @@ -1856,9 +1856,15 @@ static int scarlett2_mute_ctl_get(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *elem = kctl->private_data; - struct scarlett2_data *private = elem->head.mixer->private_data; + struct usb_mixer_interface *mixer = elem->head.mixer; + struct scarlett2_data *private = mixer->private_data; int index = line_out_remap(private, elem->control); + mutex_lock(&private->data_mutex); + if (private->vol_updated) + scarlett2_update_volumes(mixer); + mutex_unlock(&private->data_mutex); + ucontrol->value.integer.value[0] = private->mute_switch[index]; return 0; } @@ -1955,10 +1961,12 @@ static void scarlett2_vol_ctl_set_writable(struct usb_mixer_interface *mixer, ~SNDRV_CTL_ELEM_ACCESS_WRITE; } - /* Notify of write bit change */ - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + /* Notify of write bit and possible value change */ + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO, &private->vol_ctls[index]->id); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO, &private->mute_ctls[index]->id); } @@ -2530,14 +2538,18 @@ static int scarlett2_add_direct_monitor_ctl(struct usb_mixer_interface *mixer) { struct scarlett2_data *private = mixer->private_data; const struct scarlett2_device_info *info = private->info; + const char *s; if (!info->direct_monitor) return 0; + s = info->direct_monitor == 1 + ? "Direct Monitor Playback Switch" + : "Direct Monitor Playback Enum"; + return scarlett2_add_new_ctl( mixer, &scarlett2_direct_monitor_ctl[info->direct_monitor - 1], - 0, 1, "Direct Monitor Playback Switch", - &private->direct_monitor_ctl); + 0, 1, s, &private->direct_monitor_ctl); } /*** Speaker Switching Control ***/ @@ -2589,7 +2601,9 @@ static int scarlett2_speaker_switch_enable(struct usb_mixer_interface *mixer) /* disable the line out SW/HW switch */ scarlett2_sw_hw_ctl_ro(private, i); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | + SNDRV_CTL_EVENT_MASK_INFO, &private->sw_hw_ctls[i]->id); } @@ -2913,7 +2927,7 @@ static int scarlett2_dim_mute_ctl_put(struct snd_kcontrol *kctl, if (private->vol_sw_hw_switch[line_index]) { private->mute_switch[line_index] = val; snd_ctl_notify(mixer->chip->card, - SNDRV_CTL_EVENT_MASK_INFO, + SNDRV_CTL_EVENT_MASK_VALUE, &private->mute_ctls[i]->id); } } @@ -3455,7 +3469,7 @@ static int scarlett2_add_msd_ctl(struct usb_mixer_interface *mixer) /* Add MSD control */ return scarlett2_add_new_ctl(mixer, &scarlett2_msd_ctl, - 0, 1, "MSD Mode", NULL); + 0, 1, "MSD Mode Switch", NULL); } /*** Cleanup/Suspend Callbacks ***/ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e7accd87e063..326d1b0ea5e6 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1899,6 +1899,7 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ + REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ { 0 } /* terminator */ }; diff --git a/tools/io_uring/io_uring-cp.c b/tools/io_uring/io_uring-cp.c index 81461813ec62..d9bd6f5f8f46 100644 --- a/tools/io_uring/io_uring-cp.c +++ b/tools/io_uring/io_uring-cp.c @@ -131,8 +131,7 @@ static int copy_file(struct io_uring *ring, off_t insize) writes = reads = offset = 0; while (insize || write_left) { - unsigned long had_reads; - int got_comp; + int had_reads, got_comp; /* * Queue up as many reads as we can @@ -174,8 +173,13 @@ static int copy_file(struct io_uring *ring, off_t insize) if (!got_comp) { ret = io_uring_wait_cqe(ring, &cqe); got_comp = 1; - } else + } else { ret = io_uring_peek_cqe(ring, &cqe); + if (ret == -EAGAIN) { + cqe = NULL; + ret = 0; + } + } if (ret < 0) { fprintf(stderr, "io_uring_peek_cqe: %s\n", strerror(-ret)); @@ -194,7 +198,7 @@ static int copy_file(struct io_uring *ring, off_t insize) fprintf(stderr, "cqe failed: %s\n", strerror(-cqe->res)); return 1; - } else if ((size_t) cqe->res != data->iov.iov_len) { + } else if (cqe->res != data->iov.iov_len) { /* Short read/write, adjust and requeue */ data->iov.iov_base += cqe->res; data->iov.iov_len -= cqe->res; @@ -221,6 +225,25 @@ static int copy_file(struct io_uring *ring, off_t insize) } } + /* wait out pending writes */ + while (writes) { + struct io_data *data; + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + return 1; + } + if (cqe->res < 0) { + fprintf(stderr, "write res=%d\n", cqe->res); + return 1; + } + data = io_uring_cqe_get_data(cqe); + free(data); + writes--; + io_uring_cqe_seen(ring, cqe); + } + return 0; } diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 85de4fd50699..77dc24d58302 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -804,6 +804,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) btf->nr_types = 0; btf->start_id = 1; btf->start_str_off = 0; + btf->fd = -1; if (base_btf) { btf->base_btf = base_btf; @@ -832,8 +833,6 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) if (err) goto done; - btf->fd = -1; - done: if (err) { btf__free(btf); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index ecaae2927ab8..cd8c703dde71 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -75,6 +75,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; break; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT; + break; case BPF_PROG_TYPE_SK_LOOKUP: xattr.expected_attach_type = BPF_SK_LOOKUP; break; @@ -104,7 +107,6 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_EXT: diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 22f8326547eb..bc1f64873c8f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session, return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, cs_etm__dump_event(etm, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; } err = cs_etm__synth_events(etm, session); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 72e7f3616157..8af693d9678c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } - - nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; if (build_id__is_defined(bid)) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a1bd7007a8b4..fc683bc41715 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void) return perf_pmu__find_map(NULL); } -static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) +/* + * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name + * to be valid. + */ +static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok) { - char *p; + const char *p; if (strncmp(pmu_name, tok, strlen(tok))) return false; @@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) if (*p == 0) return true; - if (*p != '_') - return false; + if (*p == '_') + ++p; - ++p; - if (*p == 0 || !isdigit(*p)) - return false; + /* Ensure we end in a number */ + while (1) { + if (!isdigit(*p)) + return false; + if (*(++p) == 0) + break; + } return true; } @@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name) * match "socket" in "socketX_pmunameY" and then "pmuname" in * "pmunameY". */ - for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + while (1) { + char *next_tok = strtok_r(NULL, ",", &tmp); + name = strstr(name, tok); - if (!name || !perf_pmu__valid_suffix((char *)name, tok)) { + if (!name || + (!next_tok && !perf_pmu__valid_suffix(name, tok))) { res = false; goto out; } + if (!next_tok) + break; + tok = next_tok; + name += strlen(tok); } res = true; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 54f367cbadae..b1bff5fb0f65 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -434,7 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t, dev_dbg(dev, "%s: transition out verify\n", __func__); fw->state = FW_STATE_UPDATED; fw->missed_activate = false; - /* fall through */ + fallthrough; case FW_STATE_UPDATED: nd_cmd->status = 0; /* bogus test version */ diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index addcfd8b615e..433f8bef261e 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -23,7 +23,6 @@ test_skb_cgroup_id_user test_cgroup_storage test_flow_dissector flow_dissector_load -test_netcnt test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f405b20c1e6c..2a58b7b5aea4 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_verifier_log test_dev_cgroup \ test_sock test_sockmap get_cgroup_id_user \ test_cgroup_storage \ - test_netcnt test_tcpnotify_user test_sysctl \ + test_tcpnotify_user test_sysctl \ test_progs-no_alu32 # Also test bpf-gcc, if present @@ -197,7 +197,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c -$(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 26468a8f44f3..d6857683397f 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -310,3 +310,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, } return -1; } + +char *ping_command(int family) +{ + if (family == AF_INET6) { + /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ + if (!system("which ping6 >/dev/null 2>&1")) + return "ping6"; + else + return "ping -6"; + } + return "ping"; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index d60bc2897770..c59a8f6d770b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -46,5 +46,6 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int timeout_ms); int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); +char *ping_command(int family); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c new file mode 100644 index 000000000000..6ede48bde91b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/sysinfo.h> +#include <test_progs.h> +#include "network_helpers.h" +#include "netcnt_prog.skel.h" +#include "netcnt_common.h" + +#define CG_NAME "/netcnt" + +void test_netcnt(void) +{ + union percpu_net_cnt *percpu_netcnt = NULL; + struct bpf_cgroup_storage_key key; + int map_fd, percpu_map_fd; + struct netcnt_prog *skel; + unsigned long packets; + union net_cnt netcnt; + unsigned long bytes; + int cpu, nproc; + int cg_fd = -1; + char cmd[128]; + + skel = netcnt_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load")) + return; + + nproc = get_nprocs_conf(); + percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); + if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)")) + goto err; + + cg_fd = test__join_cgroup(CG_NAME); + if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup")) + goto err; + + skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd); + if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt, + "attach_cgroup(bpf_nextcnt)")) + goto err; + + snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6)); + ASSERT_OK(system(cmd), cmd); + + map_fd = bpf_map__fd(skel->maps.netcnt); + if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key")) + goto err; + + if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)")) + goto err; + + percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt); + if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]), + "bpf_map_lookup_elem(percpu_netcnt)")) + goto err; + + /* Some packets can be still in per-cpu cache, but not more than + * MAX_PERCPU_PACKETS. + */ + packets = netcnt.packets; + bytes = netcnt.bytes; + for (cpu = 0; cpu < nproc; cpu++) { + ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS"); + + packets += percpu_netcnt[cpu].packets; + bytes += percpu_netcnt[cpu].bytes; + } + + /* No packets should be lost */ + ASSERT_EQ(packets, 10000, "packets"); + + /* Let's check that bytes counter matches the number of packets + * multiplied by the size of ipv6 ICMP packet. + */ + ASSERT_EQ(bytes, packets * 104, "bytes"); + +err: + if (cg_fd != -1) + close(cg_fd); + free(percpu_netcnt); + netcnt_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index de2688166696..4e91f4d6466c 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -34,8 +34,8 @@ void test_reference_tracking(void) if (!test__start_subtest(title)) continue; - /* Expect verifier failure if test name has 'fail' */ - if (strstr(title, "fail") != NULL) { + /* Expect verifier failure if test name has 'err' */ + if (strstr(title, "err_") != NULL) { libbpf_print_fn_t old_print_fn; old_print_fn = libbpf_set_print(NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 932e4ee3f97c..e7201ba29ccd 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -390,18 +390,6 @@ done: close(client_fd); } -static char *ping_command(int family) -{ - if (family == AF_INET6) { - /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ - if (!system("which ping6 >/dev/null 2>&1")) - return "ping6"; - else - return "ping -6"; - } - return "ping"; -} - static int test_ping(int family, const char *addr) { SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c new file mode 100644 index 000000000000..6b186b4238d0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 + +/** + * Test XDP bonding support + * + * Sets up two bonded veth pairs between two fresh namespaces + * and verifies that XDP_TX program loaded on a bond device + * are correctly loaded onto the slave devices and XDP_TX'd + * packets are balanced using bonding. + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <net/if.h> +#include <linux/if_link.h> +#include "test_progs.h" +#include "network_helpers.h" +#include <linux/if_bonding.h> +#include <linux/limits.h> +#include <linux/udp.h> + +#include "xdp_dummy.skel.h" +#include "xdp_redirect_multi_kern.skel.h" +#include "xdp_tx.skel.h" + +#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55} +#define BOND1_MAC_STR "00:11:22:33:44:55" +#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66} +#define BOND2_MAC_STR "00:22:33:44:55:66" +#define NPACKETS 100 + +static int root_netns_fd = -1; + +static void restore_root_netns(void) +{ + ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns"); +} + +static int setns_by_name(char *name) +{ + int nsfd, err; + char nspath[PATH_MAX]; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (nsfd < 0) + return -1; + + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + return err; +} + +static int get_rx_packets(const char *iface) +{ + FILE *f; + char line[512]; + int iface_len = strlen(iface); + + f = fopen("/proc/net/dev", "r"); + if (!f) + return -1; + + while (fgets(line, sizeof(line), f)) { + char *p = line; + + while (*p == ' ') + p++; /* skip whitespace */ + if (!strncmp(p, iface, iface_len)) { + p += iface_len; + if (*p++ != ':') + continue; + while (*p == ' ') + p++; /* skip whitespace */ + while (*p && *p != ' ') + p++; /* skip rx bytes */ + while (*p == ' ') + p++; /* skip whitespace */ + fclose(f); + return atoi(p); + } + } + fclose(f); + return -1; +} + +#define MAX_BPF_LINKS 8 + +struct skeletons { + struct xdp_dummy *xdp_dummy; + struct xdp_tx *xdp_tx; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + + int nlinks; + struct bpf_link *links[MAX_BPF_LINKS]; +}; + +static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface) +{ + struct bpf_link *link; + int ifindex; + + ifindex = if_nametoindex(iface); + if (!ASSERT_GT(ifindex, 0, "get ifindex")) + return -1; + + if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached")) + return -1; + + link = bpf_program__attach_xdp(prog, ifindex); + if (!ASSERT_OK_PTR(link, "attach xdp program")) + return -1; + + skeletons->links[skeletons->nlinks++] = link; + return 0; +} + +enum { + BOND_ONE_NO_ATTACH = 0, + BOND_BOTH_AND_ATTACH, +}; + +static const char * const mode_names[] = { + [BOND_MODE_ROUNDROBIN] = "balance-rr", + [BOND_MODE_ACTIVEBACKUP] = "active-backup", + [BOND_MODE_XOR] = "balance-xor", + [BOND_MODE_BROADCAST] = "broadcast", + [BOND_MODE_8023AD] = "802.3ad", + [BOND_MODE_TLB] = "balance-tlb", + [BOND_MODE_ALB] = "balance-alb", +}; + +static const char * const xmit_policy_names[] = { + [BOND_XMIT_POLICY_LAYER2] = "layer2", + [BOND_XMIT_POLICY_LAYER34] = "layer3+4", + [BOND_XMIT_POLICY_LAYER23] = "layer2+3", + [BOND_XMIT_POLICY_ENCAP23] = "encap2+3", + [BOND_XMIT_POLICY_ENCAP34] = "encap3+4", +}; + +static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, + int bond_both_attach) +{ +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + return -1; \ + }) + + SYS("ip netns add ns_dst"); + SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); + SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); + + SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s", + mode_names[mode], xmit_policy_names[xmit_policy]); + SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); + SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", + mode_names[mode], xmit_policy_names[xmit_policy]); + SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); + + SYS("ip link set veth1_1 master bond1"); + if (bond_both_attach == BOND_BOTH_AND_ATTACH) { + SYS("ip link set veth1_2 master bond1"); + } else { + SYS("ip link set veth1_2 up addrgenmode none"); + + if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2")) + return -1; + } + + SYS("ip -netns ns_dst link set veth2_1 master bond2"); + + if (bond_both_attach == BOND_BOTH_AND_ATTACH) + SYS("ip -netns ns_dst link set veth2_2 master bond2"); + else + SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none"); + + /* Load a dummy program on sending side as with veth peer needs to have a + * XDP program loaded as well. + */ + if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1")) + return -1; + + if (bond_both_attach == BOND_BOTH_AND_ATTACH) { + if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst")) + return -1; + + if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2")) + return -1; + + restore_root_netns(); + } + + return 0; + +#undef SYS +} + +static void bonding_cleanup(struct skeletons *skeletons) +{ + restore_root_netns(); + while (skeletons->nlinks) { + skeletons->nlinks--; + bpf_link__destroy(skeletons->links[skeletons->nlinks]); + } + ASSERT_OK(system("ip link delete bond1"), "delete bond1"); + ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1"); + ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2"); + ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst"); +} + +static int send_udp_packets(int vary_dst_ip) +{ + struct ethhdr eh = { + .h_source = BOND1_MAC, + .h_dest = BOND2_MAC, + .h_proto = htons(ETH_P_IP), + }; + uint8_t buf[128] = {}; + struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh)); + struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph)); + int i, s = -1; + int ifindex; + + s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_GE(s, 0, "socket")) + goto err; + + ifindex = if_nametoindex("bond1"); + if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex")) + goto err; + + memcpy(buf, &eh, sizeof(eh)); + iph->ihl = 5; + iph->version = 4; + iph->tos = 16; + iph->id = 1; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->saddr = 1; + iph->daddr = 2; + iph->tot_len = htons(sizeof(buf) - ETH_HLEN); + iph->check = 0; + + for (i = 1; i <= NPACKETS; i++) { + int n; + struct sockaddr_ll saddr_ll = { + .sll_ifindex = ifindex, + .sll_halen = ETH_ALEN, + .sll_addr = BOND2_MAC, + }; + + /* vary the UDP destination port for even distribution with roundrobin/xor modes */ + uh->dest++; + + if (vary_dst_ip) + iph->daddr++; + + n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll)); + if (!ASSERT_EQ(n, sizeof(buf), "sendto")) + goto err; + } + + return 0; + +err: + if (s >= 0) + close(s); + return -1; +} + +static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy) +{ + int bond1_rx; + + if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH)) + goto out; + + if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34)) + goto out; + + bond1_rx = get_rx_packets("bond1"); + ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets"); + + switch (mode) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_XOR: { + int veth1_rx = get_rx_packets("veth1_1"); + int veth2_rx = get_rx_packets("veth1_2"); + int diff = abs(veth1_rx - veth2_rx); + + ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets"); + + switch (xmit_policy) { + case BOND_XMIT_POLICY_LAYER2: + ASSERT_GE(diff, NPACKETS, + "expected packets on only one of the interfaces"); + break; + case BOND_XMIT_POLICY_LAYER23: + case BOND_XMIT_POLICY_LAYER34: + ASSERT_LT(diff, NPACKETS/2, + "expected even distribution of packets"); + break; + default: + PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy); + break; + } + break; + } + case BOND_MODE_ACTIVEBACKUP: { + int veth1_rx = get_rx_packets("veth1_1"); + int veth2_rx = get_rx_packets("veth1_2"); + int diff = abs(veth1_rx - veth2_rx); + + ASSERT_GE(diff, NPACKETS, + "expected packets on only one of the interfaces"); + break; + } + default: + PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy); + break; + } + +out: + bonding_cleanup(skeletons); +} + +/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding + * all the interfaces to it and checking that broadcasting won't send the packet + * to neither the ingress bond device (bond2) or its slave (veth2_1). + */ +static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons) +{ + static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"}; + int veth1_1_rx, veth1_2_rx; + int err; + + if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, + BOND_ONE_NO_ATTACH)) + goto out; + + + if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst")) + goto out; + + /* populate the devmap with the relevant interfaces */ + for (int i = 0; i < ARRAY_SIZE(ifaces); i++) { + int ifindex = if_nametoindex(ifaces[i]); + int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all); + + if (!ASSERT_GT(ifindex, 0, "could not get interface index")) + goto out; + + err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0); + if (!ASSERT_OK(err, "add interface to map_all")) + goto out; + } + + if (xdp_attach(skeletons, + skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog, + "bond2")) + goto out; + + restore_root_netns(); + + if (send_udp_packets(BOND_MODE_ROUNDROBIN)) + goto out; + + veth1_1_rx = get_rx_packets("veth1_1"); + veth1_2_rx = get_rx_packets("veth1_2"); + + ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1"); + ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2"); + +out: + restore_root_netns(); + bonding_cleanup(skeletons); +} + +/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */ +static void test_xdp_bonding_attach(struct skeletons *skeletons) +{ + struct bpf_link *link = NULL; + struct bpf_link *link2 = NULL; + int veth, bond; + int err; + + if (!ASSERT_OK(system("ip link add veth type veth"), "add veth")) + goto out; + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) + goto out; + + veth = if_nametoindex("veth"); + if (!ASSERT_GE(veth, 0, "if_nametoindex veth")) + goto out; + bond = if_nametoindex("bond"); + if (!ASSERT_GE(bond, 0, "if_nametoindex bond")) + goto out; + + /* enslaving with a XDP program loaded fails */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth); + if (!ASSERT_OK_PTR(link, "attach program to veth")) + goto out; + + err = system("ip link set veth master bond"); + if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail")) + goto out; + + bpf_link__destroy(link); + link = NULL; + + err = system("ip link set veth master bond"); + if (!ASSERT_OK(err, "set veth master")) + goto out; + + /* attaching to slave when master has no program is allowed */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth); + if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved")) + goto out; + + /* attaching to master not allowed when slave has program loaded */ + link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program")) + goto out; + + bpf_link__destroy(link); + link = NULL; + + /* attaching XDP program to master allowed when slave has no program */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + if (!ASSERT_OK_PTR(link, "attach program to master")) + goto out; + + /* attaching to slave not allowed when master has program loaded */ + link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + ASSERT_ERR_PTR(link2, "attach program to slave when master has program"); + +out: + bpf_link__destroy(link); + bpf_link__destroy(link2); + + system("ip link del veth"); + system("ip link del bond"); +} + +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level != LIBBPF_WARN) + vprintf(format, args); + return 0; +} + +struct bond_test_case { + char *name; + int mode; + int xmit_policy; +}; + +static struct bond_test_case bond_test_cases[] = { + { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, }, + { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 }, + + { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, }, + { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, }, + { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, }, +}; + +void test_xdp_bonding(void) +{ + libbpf_print_fn_t old_print_fn; + struct skeletons skeletons = {}; + int i; + + old_print_fn = libbpf_set_print(libbpf_debug_print); + + root_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net")) + goto out; + + skeletons.xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load")) + goto out; + + skeletons.xdp_tx = xdp_tx__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load")) + goto out; + + skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern, + "xdp_redirect_multi_kern__open_and_load")) + goto out; + + if (!test__start_subtest("xdp_bonding_attach")) + test_xdp_bonding_attach(&skeletons); + + for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { + struct bond_test_case *test_case = &bond_test_cases[i]; + + if (!test__start_subtest(test_case->name)) + test_xdp_bonding_with_mode( + &skeletons, + test_case->mode, + test_case->xmit_policy); + } + + if (!test__start_subtest("xdp_bonding_redirect_multi")) + test_xdp_bonding_redirect_multi(&skeletons); + +out: + xdp_dummy__destroy(skeletons.xdp_dummy); + xdp_tx__destroy(skeletons.xdp_tx); + xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern); + + libbpf_set_print(old_print_fn); + if (root_netns_fd >= 0) + close(root_netns_fd); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 2e4775c35414..92267abb462f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, } BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", - seq_num, src, srcp, destp, destp); + seq_num, src, srcp, dest, destp); BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", state, tp->write_seq - tp->snd_una, rx_queue, diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index e83d0b48d80c..8249075f088f 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_use_after_free") +SEC("classifier/err_use_after_free") int bpf_sk_lookup_uaf(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) return family; } -SEC("classifier/fail_modify_sk_pointer") +SEC("classifier/err_modify_sk_pointer") int bpf_sk_lookup_modptr(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_modify_sk_or_null_pointer") +SEC("classifier/err_modify_sk_or_null_pointer") int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_no_release") +SEC("classifier/err_no_release") int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_release_twice") +SEC("classifier/err_release_twice") int bpf_sk_lookup_test3(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_release_unchecked") +SEC("classifier/err_release_unchecked") int bpf_sk_lookup_test4(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("classifier/fail_no_release_subcall") +SEC("classifier/err_no_release_subcall") int bpf_sk_lookup_test5(struct __sk_buff *skb) { lookup_no_release(skb); diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c index 94e6c2b281cb..5f725c720e00 100644 --- a/tools/testing/selftests/bpf/progs/xdp_tx.c +++ b/tools/testing/selftests/bpf/progs/xdp_tx.c @@ -3,7 +3,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -SEC("tx") +SEC("xdp") int xdp_tx(struct xdp_md *xdp) { return XDP_TX; diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c deleted file mode 100644 index 4990a99e7381..000000000000 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/sysinfo.h> -#include <sys/time.h> - -#include <linux/bpf.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "cgroup_helpers.h" -#include "bpf_rlimit.h" -#include "netcnt_common.h" - -#define BPF_PROG "./netcnt_prog.o" -#define TEST_CGROUP "/test-network-counters/" - -static int bpf_find_map(const char *test, struct bpf_object *obj, - const char *name) -{ - struct bpf_map *map; - - map = bpf_object__find_map_by_name(obj, name); - if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); - return -1; - } - return bpf_map__fd(map); -} - -int main(int argc, char **argv) -{ - union percpu_net_cnt *percpu_netcnt; - struct bpf_cgroup_storage_key key; - int map_fd, percpu_map_fd; - int error = EXIT_FAILURE; - struct bpf_object *obj; - int prog_fd, cgroup_fd; - unsigned long packets; - union net_cnt netcnt; - unsigned long bytes; - int cpu, nproc; - __u32 prog_cnt; - - nproc = get_nprocs_conf(); - percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); - if (!percpu_netcnt) { - printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); - goto err; - } - - if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB, - &obj, &prog_fd)) { - printf("Failed to load bpf program\n"); - goto out; - } - - cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); - if (cgroup_fd < 0) - goto err; - - /* Attach bpf program */ - if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { - printf("Failed to attach bpf program"); - goto err; - } - - if (system("which ping6 &>/dev/null") == 0) - assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null")); - else - assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null")); - - if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, - &prog_cnt)) { - printf("Failed to query attached programs"); - goto err; - } - - map_fd = bpf_find_map(__func__, obj, "netcnt"); - if (map_fd < 0) { - printf("Failed to find bpf map with net counters"); - goto err; - } - - percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt"); - if (percpu_map_fd < 0) { - printf("Failed to find bpf map with percpu net counters"); - goto err; - } - - if (bpf_map_get_next_key(map_fd, NULL, &key)) { - printf("Failed to get key in cgroup storage\n"); - goto err; - } - - if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) { - printf("Failed to lookup cgroup storage\n"); - goto err; - } - - if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) { - printf("Failed to lookup percpu cgroup storage\n"); - goto err; - } - - /* Some packets can be still in per-cpu cache, but not more than - * MAX_PERCPU_PACKETS. - */ - packets = netcnt.packets; - bytes = netcnt.bytes; - for (cpu = 0; cpu < nproc; cpu++) { - if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) { - printf("Unexpected percpu value: %llu\n", - percpu_netcnt[cpu].packets); - goto err; - } - - packets += percpu_netcnt[cpu].packets; - bytes += percpu_netcnt[cpu].bytes; - } - - /* No packets should be lost */ - if (packets != 10000) { - printf("Unexpected packet count: %lu\n", packets); - goto err; - } - - /* Let's check that bytes counter matches the number of packets - * multiplied by the size of ipv6 ICMP packet. - */ - if (bytes != packets * 104) { - printf("Unexpected bytes count: %lu\n", bytes); - goto err; - } - - error = 0; - printf("test_netcnt:PASS\n"); - -err: - cleanup_cgroup_environment(); - free(percpu_netcnt); - -out: - return error; -} diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index ba8ffcdaac30..995278e684b6 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy -ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx +ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy trap cleanup EXIT diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index 2c8935b3e65d..ee454327e5c6 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -159,3 +159,15 @@ .result = ACCEPT, .retval = 2, }, +{ + "dead code: zero extension", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 412eaee7884a..b66910702c0a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -117,7 +117,7 @@ #define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) #define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) @@ -182,4 +182,7 @@ #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* hypercall options */ +#define HV_HYPERCALL_FAST_BIT BIT(16) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index bab10ae787b6..e0b2bb1339b1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -215,7 +215,7 @@ int main(void) vcpu_set_hv_cpuid(vm, VCPU_ID); tsc_page_gva = vm_vaddr_alloc_page(vm); - memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index af27c7e829c1..91d88aaa9899 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val) } static int nr_gp; +static int nr_ud; static inline u64 hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address) @@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs) regs->rip = (uint64_t)&wrmsr_end; } +static void guest_ud_handler(struct ex_regs *regs) +{ + nr_ud++; + regs->rip += 3; +} + struct msr_data { uint32_t idx; bool available; @@ -90,6 +97,7 @@ struct msr_data { struct hcall_data { uint64_t control; uint64_t expect; + bool ud_expected; }; static void guest_msr(struct msr_data *msr) @@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr) static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) { int i = 0; + u64 res, input, output; wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); while (hcall->control) { - GUEST_ASSERT(hypercall(hcall->control, pgs_gpa, - pgs_gpa + 4096) == hcall->expect); + nr_ud = 0; + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; + } + + res = hypercall(hcall->control, input, output); + if (hcall->ud_expected) + GUEST_ASSERT(nr_ud == 1); + else + GUEST_ASSERT(res == hcall->expect); + GUEST_SYNC(i++); } @@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall recomm.ebx = 0xfff; hcall->expect = HV_STATUS_SUCCESS; break; - case 17: + /* XMM fast hypercall */ + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = true; + break; + case 18: + feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + hcall->ud_expected = false; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 19: /* END */ hcall->control = 0; break; @@ -625,6 +656,10 @@ int main(void) /* Test hypercalls */ vm = vm_create_default(VCPU_ID, 0, guest_hcall); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index 57b505cb1561..e1bf55dabdf6 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -57,6 +57,29 @@ const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06, const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00}; const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00}; const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00}; +const __u8 nci_rf_deact_ntf[] = {0x61, 0x06, 0x02, 0x00, 0x00}; +const __u8 nci_rf_activate_ntf[] = {0x61, 0x05, 0x1D, 0x01, 0x02, 0x04, 0x00, + 0xFF, 0xFF, 0x0C, 0x44, 0x03, 0x07, 0x04, + 0x62, 0x26, 0x11, 0x80, 0x1D, 0x80, 0x01, + 0x20, 0x00, 0x00, 0x00, 0x06, 0x05, 0x75, + 0x77, 0x81, 0x02, 0x80}; +const __u8 nci_t4t_select_cmd[] = {0x00, 0x00, 0x0C, 0x00, 0xA4, 0x04, 0x00, + 0x07, 0xD2, 0x76, 0x00, 0x00, 0x85, 0x01, 0x01}; +const __u8 nci_t4t_select_cmd2[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02, + 0xE1, 0x03}; +const __u8 nci_t4t_select_cmd3[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02, + 0xE1, 0x04}; +const __u8 nci_t4t_read_cmd[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x0F}; +const __u8 nci_t4t_read_rsp[] = {0x00, 0x00, 0x11, 0x00, 0x0F, 0x20, 0x00, 0x3B, + 0x00, 0x34, 0x04, 0x06, 0xE1, 0x04, 0x08, 0x00, + 0x00, 0x00, 0x90, 0x00}; +const __u8 nci_t4t_read_cmd2[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x02}; +const __u8 nci_t4t_read_rsp2[] = {0x00, 0x00, 0x04, 0x00, 0x0F, 0x90, 0x00}; +const __u8 nci_t4t_read_cmd3[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x02, 0x0F}; +const __u8 nci_t4t_read_rsp3[] = {0x00, 0x00, 0x11, 0xD1, 0x01, 0x0B, 0x54, 0x02, + 0x65, 0x6E, 0x4E, 0x46, 0x43, 0x20, 0x54, 0x45, + 0x53, 0x54, 0x90, 0x00}; +const __u8 nci_t4t_rsp_ok[] = {0x00, 0x00, 0x02, 0x90, 0x00}; struct msgtemplate { struct nlmsghdr n; @@ -87,7 +110,7 @@ error: static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, __u8 genl_cmd, int nla_num, __u16 nla_type[], - void *nla_data[], int nla_len[]) + void *nla_data[], int nla_len[], __u16 flags) { struct sockaddr_nl nladdr; struct msgtemplate msg; @@ -98,7 +121,7 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); msg.n.nlmsg_type = nlmsg_type; - msg.n.nlmsg_flags = NLM_F_REQUEST; + msg.n.nlmsg_flags = flags; msg.n.nlmsg_seq = 0; msg.n.nlmsg_pid = nlmsg_pid; msg.g.cmd = genl_cmd; @@ -110,11 +133,11 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, na->nla_type = nla_type[cnt]; na->nla_len = nla_len[cnt] + NLA_HDRLEN; - if (nla_len > 0) + if (nla_len[cnt] > 0) memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]); - msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); - prv_len = na->nla_len; + prv_len = NLA_ALIGN(nla_len[cnt]) + NLA_HDRLEN; + msg.n.nlmsg_len += prv_len; } buf = (char *)&msg; @@ -146,11 +169,11 @@ static int send_get_nfc_family(int sd, __u32 pid) nla_get_family_data = family_name; return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY, - 1, &nla_get_family_type, - &nla_get_family_data, &nla_get_family_len); + 1, &nla_get_family_type, &nla_get_family_data, + &nla_get_family_len, NLM_F_REQUEST); } -static int get_family_id(int sd, __u32 pid) +static int get_family_id(int sd, __u32 pid, __u32 *event_group) { struct { struct nlmsghdr n; @@ -158,8 +181,9 @@ static int get_family_id(int sd, __u32 pid) char buf[512]; } ans; struct nlattr *na; - int rep_len; + int resp_len; __u16 id; + int len; int rc; rc = send_get_nfc_family(sd, pid); @@ -167,17 +191,49 @@ static int get_family_id(int sd, __u32 pid) if (rc < 0) return 0; - rep_len = recv(sd, &ans, sizeof(ans), 0); + resp_len = recv(sd, &ans, sizeof(ans), 0); - if (ans.n.nlmsg_type == NLMSG_ERROR || rep_len < 0 || - !NLMSG_OK(&ans.n, rep_len)) + if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 || + !NLMSG_OK(&ans.n, resp_len)) return 0; + len = 0; + resp_len = GENLMSG_PAYLOAD(&ans.n); na = (struct nlattr *)GENLMSG_DATA(&ans); - na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len)); - if (na->nla_type == CTRL_ATTR_FAMILY_ID) - id = *(__u16 *)NLA_DATA(na); + while (len < resp_len) { + len += NLA_ALIGN(na->nla_len); + if (na->nla_type == CTRL_ATTR_FAMILY_ID) { + id = *(__u16 *)NLA_DATA(na); + } else if (na->nla_type == CTRL_ATTR_MCAST_GROUPS) { + struct nlattr *nested_na; + struct nlattr *group_na; + int group_attr_len; + int group_attr; + + nested_na = (struct nlattr *)((char *)na + NLA_HDRLEN); + group_na = (struct nlattr *)((char *)nested_na + NLA_HDRLEN); + group_attr_len = 0; + + for (group_attr = CTRL_ATTR_MCAST_GRP_UNSPEC; + group_attr < CTRL_ATTR_MCAST_GRP_MAX; group_attr++) { + if (group_na->nla_type == CTRL_ATTR_MCAST_GRP_ID) { + *event_group = *(__u32 *)((char *)group_na + + NLA_HDRLEN); + break; + } + + group_attr_len += NLA_ALIGN(group_na->nla_len) + + NLA_HDRLEN; + if (group_attr_len >= nested_na->nla_len) + break; + + group_na = (struct nlattr *)((char *)group_na + + NLA_ALIGN(group_na->nla_len)); + } + } + na = (struct nlattr *)(GENLMSG_DATA(&ans) + len); + } return id; } @@ -189,12 +245,12 @@ static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, int nla_len = 4; return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1, - &nla_type, &nla_data, &nla_len); + &nla_type, &nla_data, &nla_len, NLM_F_REQUEST); } static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg) { - int rc, rep_len; + int rc, resp_len; rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id); if (rc < 0) { @@ -202,14 +258,14 @@ static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtem goto error; } - rep_len = recv(sd, msg, sizeof(*msg), 0); - if (rep_len < 0) { + resp_len = recv(sd, msg, sizeof(*msg), 0); + if (resp_len < 0) { rc = -2; goto error; } if (msg->n.nlmsg_type == NLMSG_ERROR || - !NLMSG_OK(&msg->n, rep_len)) { + !NLMSG_OK(&msg->n, resp_len)) { rc = -3; goto error; } @@ -222,21 +278,21 @@ error: static __u8 get_dev_enable_state(struct msgtemplate *msg) { struct nlattr *na; - int rep_len; + int resp_len; int len; - rep_len = GENLMSG_PAYLOAD(&msg->n); + resp_len = GENLMSG_PAYLOAD(&msg->n); na = (struct nlattr *)GENLMSG_DATA(msg); len = 0; - while (len < rep_len) { + while (len < resp_len) { len += NLA_ALIGN(na->nla_len); if (na->nla_type == NFC_ATTR_DEVICE_POWERED) return *(char *)NLA_DATA(na); na = (struct nlattr *)(GENLMSG_DATA(msg) + len); } - return rep_len; + return resp_len; } FIXTURE(NCI) { @@ -270,8 +326,7 @@ static void *virtual_dev_open(void *data) dev_fd = *(int *)data; - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_reset_cmd)) @@ -280,8 +335,7 @@ static void *virtual_dev_open(void *data) goto error; write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp)); - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_init_cmd)) @@ -290,8 +344,7 @@ static void *virtual_dev_open(void *data) goto error; write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp)); - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_rf_disc_map_cmd)) @@ -313,8 +366,7 @@ static void *virtual_dev_open_v2(void *data) dev_fd = *(int *)data; - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_reset_cmd)) @@ -324,8 +376,7 @@ static void *virtual_dev_open_v2(void *data) write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2)); write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf)); - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_init_cmd_v2)) @@ -334,8 +385,7 @@ static void *virtual_dev_open_v2(void *data) goto error; write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2)); - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_rf_disc_map_cmd)) @@ -353,6 +403,7 @@ FIXTURE_SETUP(NCI) { struct msgtemplate msg; pthread_t thread_t; + __u32 event_group; int status; int rc; @@ -364,12 +415,16 @@ FIXTURE_SETUP(NCI) ASSERT_NE(self->sd, -1); self->pid = getpid(); - self->fid = get_family_id(self->sd, self->pid); + self->fid = get_family_id(self->sd, self->pid, &event_group); ASSERT_NE(self->fid, -1); self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR); ASSERT_GT(self->virtual_nci_fd, -1); + rc = setsockopt(self->sd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group, + sizeof(event_group)); + ASSERT_NE(rc, -1); + rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex); ASSERT_EQ(rc, 0); @@ -402,8 +457,7 @@ static void *virtual_deinit(void *data) dev_fd = *(int *)data; - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_reset_cmd)) @@ -425,8 +479,7 @@ static void *virtual_deinit_v2(void *data) dev_fd = *(int *)data; - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_reset_cmd)) @@ -489,16 +542,14 @@ static void *virtual_poll_start(void *data) dev_fd = *(int *)data; - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_rf_discovery_cmd)) goto error; if (memcmp(nci_rf_discovery_cmd, buf, len)) goto error; - write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp)) - ; + write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp)); return (void *)0; error: @@ -513,8 +564,7 @@ static void *virtual_poll_stop(void *data) dev_fd = *(int *)data; - while ((len = read(dev_fd, buf, 258)) == 0) - ; + len = read(dev_fd, buf, 258); if (len <= 0) goto error; if (len != sizeof(nci_rf_deact_cmd)) @@ -528,38 +578,282 @@ error: return (void *)-1; } -TEST_F(NCI, start_poll) +int start_polling(int dev_idx, int proto, int virtual_fd, int sd, int fid, int pid) { __u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX, NFC_ATTR_PROTOCOLS}; - void *nla_start_poll_data[2] = {&self->dev_idex, &self->proto}; + void *nla_start_poll_data[2] = {&dev_idx, &proto}; int nla_start_poll_len[2] = {4, 4}; pthread_t thread_t; int status; int rc; rc = pthread_create(&thread_t, NULL, virtual_poll_start, - (void *)&self->virtual_nci_fd); - ASSERT_GT(rc, -1); + (void *)&virtual_fd); + if (rc < 0) + return rc; - rc = send_cmd_mt_nla(self->sd, self->fid, self->pid, - NFC_CMD_START_POLL, 2, nla_start_poll_type, - nla_start_poll_data, nla_start_poll_len); - EXPECT_EQ(rc, 0); + rc = send_cmd_mt_nla(sd, fid, pid, NFC_CMD_START_POLL, 2, nla_start_poll_type, + nla_start_poll_data, nla_start_poll_len, NLM_F_REQUEST); + if (rc != 0) + return rc; pthread_join(thread_t, (void **)&status); - ASSERT_EQ(status, 0); + return status; +} + +int stop_polling(int dev_idx, int virtual_fd, int sd, int fid, int pid) +{ + pthread_t thread_t; + int status; + int rc; rc = pthread_create(&thread_t, NULL, virtual_poll_stop, - (void *)&self->virtual_nci_fd); - ASSERT_GT(rc, -1); + (void *)&virtual_fd); + if (rc < 0) + return rc; - rc = send_cmd_with_idx(self->sd, self->fid, self->pid, - NFC_CMD_STOP_POLL, self->dev_idex); - EXPECT_EQ(rc, 0); + rc = send_cmd_with_idx(sd, fid, pid, + NFC_CMD_STOP_POLL, dev_idx); + if (rc != 0) + return rc; pthread_join(thread_t, (void **)&status); + return status; +} + +TEST_F(NCI, start_poll) +{ + int status; + + status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd, + self->sd, self->fid, self->pid); + EXPECT_EQ(status, 0); + + status = stop_polling(self->dev_idex, self->virtual_nci_fd, self->sd, + self->fid, self->pid); + EXPECT_EQ(status, 0); +} + +int get_taginfo(int dev_idx, int sd, int fid, int pid) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[512]; + } ans; + + struct nlattr *na; + __u32 protocol; + int targetidx; + __u8 sel_res; + int resp_len; + int len; + + __u16 tagid_type; + void *tagid_type_data; + int tagid_len; + + tagid_type = NFC_ATTR_DEVICE_INDEX; + tagid_type_data = &dev_idx; + tagid_len = 4; + + send_cmd_mt_nla(sd, fid, pid, NFC_CMD_GET_TARGET, 1, &tagid_type, + &tagid_type_data, &tagid_len, NLM_F_REQUEST | NLM_F_DUMP); + resp_len = recv(sd, &ans, sizeof(ans), 0); + if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 || + !NLMSG_OK(&ans.n, resp_len)) + return -1; + + resp_len = GENLMSG_PAYLOAD(&ans.n); + na = (struct nlattr *)GENLMSG_DATA(&ans); + + len = 0; + targetidx = -1; + protocol = -1; + sel_res = -1; + + while (len < resp_len) { + len += NLA_ALIGN(na->nla_len); + + if (na->nla_type == NFC_ATTR_TARGET_INDEX) + targetidx = *(int *)((char *)na + NLA_HDRLEN); + else if (na->nla_type == NFC_ATTR_TARGET_SEL_RES) + sel_res = *(__u8 *)((char *)na + NLA_HDRLEN); + else if (na->nla_type == NFC_ATTR_PROTOCOLS) + protocol = *(__u32 *)((char *)na + NLA_HDRLEN); + + na = (struct nlattr *)(GENLMSG_DATA(&ans) + len); + } + + if (targetidx == -1 || sel_res != 0x20 || protocol != NFC_PROTO_ISO14443_MASK) + return -1; + + return targetidx; +} + +int connect_socket(int dev_idx, int target_idx) +{ + struct sockaddr_nfc addr; + int sock; + int err = 0; + + sock = socket(AF_NFC, SOCK_SEQPACKET, NFC_SOCKPROTO_RAW); + if (sock == -1) + return -1; + + addr.sa_family = AF_NFC; + addr.dev_idx = dev_idx; + addr.target_idx = target_idx; + addr.nfc_protocol = NFC_PROTO_ISO14443; + + err = connect(sock, (struct sockaddr *)&addr, sizeof(addr)); + if (err) { + close(sock); + return -1; + } + + return sock; +} + +int connect_tag(int dev_idx, int virtual_fd, int sd, int fid, int pid) +{ + struct genlmsghdr *genlhdr; + struct nlattr *na; + char evt_data[255]; + int target_idx; + int resp_len; + int evt_dev; + + write(virtual_fd, nci_rf_activate_ntf, sizeof(nci_rf_activate_ntf)); + resp_len = recv(sd, evt_data, sizeof(evt_data), 0); + if (resp_len < 0) + return -1; + + genlhdr = (struct genlmsghdr *)((struct nlmsghdr *)evt_data + 1); + na = (struct nlattr *)(genlhdr + 1); + evt_dev = *(int *)((char *)na + NLA_HDRLEN); + if (dev_idx != evt_dev) + return -1; + + target_idx = get_taginfo(dev_idx, sd, fid, pid); + if (target_idx == -1) + return -1; + return connect_socket(dev_idx, target_idx); +} + +int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_len, + const __u8 *rsp, __u32 rsp_len) +{ + char buf[256]; + unsigned int len; + + send(nfc_sock, &cmd[3], cmd_len - 3, 0); + len = read(virtual_fd, buf, cmd_len); + if (len < 0 || memcmp(buf, cmd, cmd_len)) + return -1; + + write(virtual_fd, rsp, rsp_len); + len = recv(nfc_sock, buf, rsp_len - 2, 0); + if (len < 0 || memcmp(&buf[1], &rsp[3], rsp_len - 3)) + return -1; + + return 0; +} + +int read_tag(int nfc_sock, int virtual_fd) +{ + if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd, + sizeof(nci_t4t_select_cmd), nci_t4t_rsp_ok, + sizeof(nci_t4t_rsp_ok))) + return -1; + + if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd2, + sizeof(nci_t4t_select_cmd2), nci_t4t_rsp_ok, + sizeof(nci_t4t_rsp_ok))) + return -1; + + if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd, + sizeof(nci_t4t_read_cmd), nci_t4t_read_rsp, + sizeof(nci_t4t_read_rsp))) + return -1; + + if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd3, + sizeof(nci_t4t_select_cmd3), nci_t4t_rsp_ok, + sizeof(nci_t4t_rsp_ok))) + return -1; + + if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd2, + sizeof(nci_t4t_read_cmd2), nci_t4t_read_rsp2, + sizeof(nci_t4t_read_rsp2))) + return -1; + + return read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd3, + sizeof(nci_t4t_read_cmd3), nci_t4t_read_rsp3, + sizeof(nci_t4t_read_rsp3)); +} + +static void *virtual_deactivate_proc(void *data) +{ + int virtual_fd; + char buf[256]; + int deactcmd_len; + int len; + + virtual_fd = *(int *)data; + deactcmd_len = sizeof(nci_rf_deact_cmd); + len = read(virtual_fd, buf, deactcmd_len); + if (len != deactcmd_len || memcmp(buf, nci_rf_deact_cmd, deactcmd_len)) + return (void *)-1; + + write(virtual_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp)); + write(virtual_fd, nci_rf_deact_ntf, sizeof(nci_rf_deact_ntf)); + + return (void *)0; +} + +int disconnect_tag(int nfc_sock, int virtual_fd) +{ + pthread_t thread_t; + char buf[256]; + int status; + int len; + + send(nfc_sock, &nci_t4t_select_cmd3[3], sizeof(nci_t4t_select_cmd3) - 3, 0); + len = read(virtual_fd, buf, sizeof(nci_t4t_select_cmd3)); + if (len < 0 || memcmp(buf, nci_t4t_select_cmd3, sizeof(nci_t4t_select_cmd3))) + return -1; + + len = recv(nfc_sock, buf, sizeof(nci_t4t_rsp_ok), 0); + if (len != -1) + return -1; + + status = pthread_create(&thread_t, NULL, virtual_deactivate_proc, + (void *)&virtual_fd); + + close(nfc_sock); + pthread_join(thread_t, (void **)&status); + return status; +} + +TEST_F(NCI, t4t_tag_read) +{ + int nfc_sock; + int status; + + status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd, + self->sd, self->fid, self->pid); + EXPECT_EQ(status, 0); + + nfc_sock = connect_tag(self->dev_idex, self->virtual_nci_fd, self->sd, + self->fid, self->pid); + ASSERT_GT(nfc_sock, -1); + + status = read_tag(nfc_sock, self->virtual_nci_fd); ASSERT_EQ(status, 0); + + status = disconnect_tag(nfc_sock, self->virtual_nci_fd); + EXPECT_EQ(status, 0); } TEST_F(NCI, deinit) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index a8ad92850e63..162e5f1ac36b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3879,6 +3879,32 @@ use_case_ping_lla_multi() log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C" } +# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully +# established with ns-B. +use_case_snat_on_vrf() +{ + setup "yes" + + local port="12345" + + run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF} + run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} + + run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} & + sleep 1 + run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port} + log_test $? 0 "IPv4 TCP connection over VRF with SNAT" + + run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} & + sleep 1 + run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port} + log_test $? 0 "IPv6 TCP connection over VRF with SNAT" + + # Cleanup + run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF} + run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} +} + use_cases() { log_section "Use cases" @@ -3886,6 +3912,8 @@ use_cases() use_case_br log_subsection "Ping LLA with multiple interfaces" use_case_ping_lla_multi + log_subsection "SNAT on VRF" + use_case_snat_on_vrf } ################################################################################ diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index bcf15487e584..3caf72bb9c6a 100644..100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -3,137 +3,128 @@ # # Author: Justin Iurman <justin.iurman@uliege.be> # -# This test evaluates the IOAM insertion for IPv6 by checking the IOAM data -# integrity on the receiver. +# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data +# consistency directly inside packets on the receiver side. Tests are divided +# into three categories: OUTPUT (evaluates the IOAM processing by the sender), +# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates +# wider use cases that do not fall into the other two categories). Both OUTPUT +# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL +# tests use the entire three-node topology (alpha, beta, gamma). Each test is +# documented inside its own handler in the code below. # -# The topology is formed by 3 nodes: Alpha (sender), Beta (router in-between) -# and Gamma (receiver). An IOAM domain is configured from Alpha to Gamma only, -# which means not on the reverse path. When Gamma is the destination, Alpha -# adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop and fills the -# trace with its own IOAM data. Beta and Gamma also fill the trace. The IOAM -# data integrity is checked on Gamma, by comparing with the pre-defined IOAM -# configuration (see below). +# An IOAM domain is configured from Alpha to Gamma but not on the reverse path. +# When either Beta or Gamma is the destination (depending on the test category), +# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. # -# +-------------------+ +-------------------+ -# | | | | -# | alpha netns | | gamma netns | -# | | | | -# | +-------------+ | | +-------------+ | -# | | veth0 | | | | veth0 | | -# | | db01::2/64 | | | | db02::2/64 | | -# | +-------------+ | | +-------------+ | -# | . | | . | -# +-------------------+ +-------------------+ -# . . -# . . -# . . -# +----------------------------------------------------+ -# | . . | -# | +-------------+ +-------------+ | -# | | veth0 | | veth1 | | -# | | db01::1/64 | ................ | db02::1/64 | | -# | +-------------+ +-------------+ | -# | | -# | beta netns | -# | | -# +--------------------------+-------------------------+ # +# +-------------------+ +-------------------+ +# | | | | +# | Alpha netns | | Gamma netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | db01::2/64 | | | | db02::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +----------------------------------------------------+ +# | . . | +# | +-------------+ +-------------+ | +# | | veth0 | | veth1 | | +# | | db01::1/64 | ................ | db02::1/64 | | +# | +-------------+ +-------------+ | +# | | +# | Beta netns | +# | | +# +----------------------------------------------------+ # -# ~~~~~~~~~~~~~~~~~~~~~~ -# | IOAM configuration | -# ~~~~~~~~~~~~~~~~~~~~~~ # -# Alpha -# +-----------------------------------------------------------+ -# | Type | Value | -# +-----------------------------------------------------------+ -# | Node ID | 1 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 11111111 | -# +-----------------------------------------------------------+ -# | Ingress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Egress ID | 101 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 101101 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee0 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf00dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 777 | -# +-----------------------------------------------------------+ -# | Schema Data | something that will be 4n-aligned | -# +-----------------------------------------------------------+ # -# Note: When Gamma is the destination, Alpha adds an IOAM Pre-allocated Trace -# option inside a Hop-by-hop, where 164 bytes are pre-allocated for the -# trace, with 123 as the IOAM-Namespace and with 0xfff00200 as the trace -# type (= all available options at this time). As a result, and based on -# IOAM configurations here, only both Alpha and Beta should be capable of -# inserting their IOAM data while Gamma won't have enough space and will -# set the overflow bit. +# ============================================================= +# | Alpha - IOAM configuration | +# +===========================================================+ +# | Node ID | 1 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 11111111 | +# +-----------------------------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Egress ID | 101 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 101101 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee0 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 777 | +# +-----------------------------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +-----------------------------------------------------------+ # -# Beta -# +-----------------------------------------------------------+ -# | Type | Value | -# +-----------------------------------------------------------+ -# | Node ID | 2 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 22222222 | -# +-----------------------------------------------------------+ -# | Ingress ID | 201 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 201201 | -# +-----------------------------------------------------------+ -# | Egress ID | 202 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 202202 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee1 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf11dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 0xffffff (= None) | -# +-----------------------------------------------------------+ -# | Schema Data | | -# +-----------------------------------------------------------+ # -# Gamma -# +-----------------------------------------------------------+ -# | Type | Value | -# +-----------------------------------------------------------+ -# | Node ID | 3 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 33333333 | -# +-----------------------------------------------------------+ -# | Ingress ID | 301 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 301301 | -# +-----------------------------------------------------------+ -# | Egress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee2 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf22dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 0xffffff (= None) | -# +-----------------------------------------------------------+ -# | Schema Data | | -# +-----------------------------------------------------------+ - -#=============================================================================== +# ============================================================= +# | Beta - IOAM configuration | +# +===========================================================+ +# | Node ID | 2 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 22222222 | +# +-----------------------------------------------------------+ +# | Ingress ID | 201 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 201201 | +# +-----------------------------------------------------------+ +# | Egress ID | 202 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 202202 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee1 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf11dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 666 | +# +-----------------------------------------------------------+ +# | Schema Data | Hello there -Obi | +# +-----------------------------------------------------------+ # -# WARNING: -# Do NOT modify the following configuration unless you know what you're doing. # -IOAM_NAMESPACE=123 -IOAM_TRACE_TYPE=0xfff00200 -IOAM_PREALLOC_DATA_SIZE=164 +# ============================================================= +# | Gamma - IOAM configuration | +# +===========================================================+ +# | Node ID | 3 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 33333333 | +# +-----------------------------------------------------------+ +# | Ingress ID | 301 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 301301 | +# +-----------------------------------------------------------+ +# | Egress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee2 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf22dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +-----------------------------------------------------------+ +# | Schema Data | | +# +-----------------------------------------------------------+ + + +################################################################################ +# # +# WARNING: Be careful if you modify the block below - it MUST be kept # +# synchronized with configurations inside ioam6_parser.c and always # +# reflect the same. # +# # +################################################################################ ALPHA=( 1 # ID @@ -157,8 +148,8 @@ BETA=( 202202 0xdeadbee1 0xcafec0caf11dc0de - 0xffffff - "" + 666 + "Hello there -Obi" ) GAMMA=( @@ -173,28 +164,75 @@ GAMMA=( 0xffffff "" ) -#=============================================================================== -if [ "$(id -u)" -ne 0 ]; then - echo "SKIP: Need root privileges" - exit 1 -fi +TESTS_OUTPUT=" + out_undef_ns + out_no_room + out_bits + out_full_supp_trace +" -if [ ! -x "$(command -v ip)" ]; then - echo "SKIP: Could not run test without ip tool" - exit 1 -fi +TESTS_INPUT=" + in_undef_ns + in_no_room + in_oflag + in_bits + in_full_supp_trace +" -ip ioam &>/dev/null -if [ $? = 1 ]; then - echo "SKIP: ip tool must include IOAM" - exit 1 -fi +TESTS_GLOBAL=" + fwd_full_supp_trace +" -if [ ! -e /proc/sys/net/ipv6/ioam6_id ]; then - echo "SKIP: ioam6 sysctls do not exist" - exit 1 -fi + +################################################################################ +# # +# LIBRARY # +# # +################################################################################ + +check_kernel_compatibility() +{ + ip netns add ioam-tmp-node + ip link add name veth0 netns ioam-tmp-node type veth \ + peer name veth1 netns ioam-tmp-node + + ip -netns ioam-tmp-node link set veth0 up + ip -netns ioam-tmp-node link set veth1 up + + ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null + ns_ad=$? + + ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0" + ns_sh=$? + + if [[ $ns_ad != 0 || $ns_sh != 0 ]] + then + echo "SKIP: kernel version probably too old, missing ioam support" + ip link del veth0 2>/dev/null || true + ip netns del ioam-tmp-node || true + exit 1 + fi + + ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \ + type 0x800000 ns 0 size 4 dev veth0 &>/dev/null + tr_ad=$? + + ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace" + tr_sh=$? + + if [[ $tr_ad != 0 || $tr_sh != 0 ]] + then + echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ + "without CONFIG_IPV6_IOAM6_LWTUNNEL?" + ip link del veth0 2>/dev/null || true + ip netns del ioam-tmp-node || true + exit 1 + fi + + ip link del veth0 2>/dev/null || true + ip netns del ioam-tmp-node || true +} cleanup() { @@ -212,13 +250,10 @@ setup() ip netns add ioam-node-beta ip netns add ioam-node-gamma - ip link add name ioam-veth-alpha type veth peer name ioam-veth-betaL - ip link add name ioam-veth-betaR type veth peer name ioam-veth-gamma - - ip link set ioam-veth-alpha netns ioam-node-alpha - ip link set ioam-veth-betaL netns ioam-node-beta - ip link set ioam-veth-betaR netns ioam-node-beta - ip link set ioam-veth-gamma netns ioam-node-gamma + ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \ + peer name ioam-veth-betaL netns ioam-node-beta + ip link add name ioam-veth-betaR netns ioam-node-beta type veth \ + peer name ioam-veth-gamma netns ioam-node-gamma ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0 ip -netns ioam-node-beta link set ioam-veth-betaL name veth0 @@ -228,7 +263,9 @@ setup() ip -netns ioam-node-alpha addr add db01::2/64 dev veth0 ip -netns ioam-node-alpha link set veth0 up ip -netns ioam-node-alpha link set lo up - ip -netns ioam-node-alpha route add default via db01::1 + ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0 + ip -netns ioam-node-alpha route del db01::/64 + ip -netns ioam-node-alpha route add db01::/64 dev veth0 ip -netns ioam-node-beta addr add db01::1/64 dev veth0 ip -netns ioam-node-beta addr add db02::1/64 dev veth1 @@ -239,17 +276,16 @@ setup() ip -netns ioam-node-gamma addr add db02::2/64 dev veth0 ip -netns ioam-node-gamma link set veth0 up ip -netns ioam-node-gamma link set lo up - ip -netns ioam-node-gamma route add default via db02::1 + ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0 # - IOAM config - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns ioam-node-alpha ioam namespace add ${IOAM_NAMESPACE} data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns ioam-node-alpha ioam namespace set ${IOAM_NAMESPACE} schema ${ALPHA[8]} - ip -netns ioam-node-alpha route add db02::/64 encap ioam6 trace type ${IOAM_TRACE_TYPE:0:-2} ns ${IOAM_NAMESPACE} size ${IOAM_PREALLOC_DATA_SIZE} via db01::1 dev veth0 + ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1 ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} @@ -259,38 +295,357 @@ setup() ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns ioam-node-beta ioam namespace add ${IOAM_NAMESPACE} data ${BETA[6]} wide ${BETA[7]} + ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} + ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}" + ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]} ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns ioam-node-gamma ioam namespace add ${IOAM_NAMESPACE} data ${GAMMA[6]} wide ${GAMMA[7]} -} + ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} -run() -{ - echo -n "IOAM test... " + sleep 1 ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null - if [ $? != 0 ]; then - echo "FAILED" + if [ $? != 0 ] + then + echo "Setup FAILED" cleanup &>/dev/null exit 0 fi +} - ip netns exec ioam-node-gamma ./ioam6_parser veth0 2 ${IOAM_NAMESPACE} ${IOAM_TRACE_TYPE} 64 ${ALPHA[0]} ${ALPHA[1]} ${ALPHA[2]} ${ALPHA[3]} ${ALPHA[4]} ${ALPHA[5]} ${ALPHA[6]} ${ALPHA[7]} ${ALPHA[8]} "${ALPHA[9]}" 63 ${BETA[0]} ${BETA[1]} ${BETA[2]} ${BETA[3]} ${BETA[4]} ${BETA[5]} ${BETA[6]} ${BETA[7]} ${BETA[8]} & +log_test_passed() +{ + local desc=$1 + printf "TEST: %-60s [ OK ]\n" "${desc}" +} +log_test_failed() +{ + local desc=$1 + printf "TEST: %-60s [FAIL]\n" "${desc}" +} + +run_test() +{ + local name=$1 + local desc=$2 + local node_src=$3 + local node_dst=$4 + local ip6_src=$5 + local ip6_dst=$6 + local if_dst=$7 + local trace_type=$8 + local ioam_ns=$9 + + ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ + $trace_type $ioam_ns & local spid=$! sleep 0.1 - ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null + if [ $? != 0 ] + then + log_test_failed "${desc}" + kill -2 $spid &>/dev/null + else + wait $spid + [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" + fi +} + +run() +{ + echo + echo "OUTPUT tests" + printf "%0.s-" {1..74} + echo + + # set OUTPUT settings + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + + for t in $TESTS_OUTPUT + do + $t + done + + # clean OUTPUT settings + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip -netns ioam-node-alpha route change db01::/64 dev veth0 + - wait $spid - [ $? = 0 ] && echo "PASSED" || echo "FAILED" + echo + echo "INPUT tests" + printf "%0.s-" {1..74} + echo + + # set INPUT settings + ip -netns ioam-node-alpha ioam namespace del 123 + + for t in $TESTS_INPUT + do + $t + done + + # clean INPUT settings + ip -netns ioam-node-alpha ioam namespace add 123 \ + data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} + ip -netns ioam-node-alpha route change db01::/64 dev veth0 + + + echo + echo "GLOBAL tests" + printf "%0.s-" {1..74} + echo + + for t in $TESTS_GLOBAL + do + $t + done } +bit2type=( + 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000 + 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100 + 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 +) +bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 ) + + +################################################################################ +# # +# OUTPUT tests # +# # +# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. # +################################################################################ + +out_undef_ns() +{ + ############################################################################## + # Make sure that the encap node won't fill the trace if the chosen IOAM # + # namespace is not configured locally. # + ############################################################################## + local desc="Unknown IOAM namespace" + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0x800000 ns 0 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0x800000 0 +} + +out_no_room() +{ + ############################################################################## + # Make sure that the encap node won't fill the trace and will set the # + # Overflow flag since there is no room enough for its data. # + ############################################################################## + local desc="Missing trace room" + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xc00000 123 +} + +out_bits() +{ + ############################################################################## + # Make sure that, for each trace type bit, the encap node will either: # + # (i) fill the trace with its data when it is a supported bit # + # (ii) not fill the trace with its data when it is an unsupported bit # + ############################################################################## + local desc="Trace type with bit <n> only" + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + + for i in {0..22} + do + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ + prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 + + run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 ${bit2type[$i]} 123 + done + + bit2size[22]=$tmp +} + +out_full_supp_trace() +{ + ############################################################################## + # Make sure that the encap node will correctly fill a full trace. Be careful,# + # "full trace" here does NOT mean all bits (only supported ones). # + ############################################################################## + local desc="Full supported trace" + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xfff002 ns 123 size 100 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xfff002 123 +} + + +################################################################################ +# # +# INPUT tests # +# # +# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon # +# insertion -> the IOAM namespace configured on the sender is removed # +# and is used in the inserted trace to force the sender not to fill it. # +################################################################################ + +in_undef_ns() +{ + ############################################################################## + # Make sure that the receiving node won't fill the trace if the related IOAM # + # namespace is not configured locally. # + ############################################################################## + local desc="Unknown IOAM namespace" + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0x800000 ns 0 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0x800000 0 +} + +in_no_room() +{ + ############################################################################## + # Make sure that the receiving node won't fill the trace and will set the # + # Overflow flag if there is no room enough for its data. # + ############################################################################## + local desc="Missing trace room" + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xc00000 123 +} + +in_bits() +{ + ############################################################################## + # Make sure that, for each trace type bit, the receiving node will either: # + # (i) fill the trace with its data when it is a supported bit # + # (ii) not fill the trace with its data when it is an unsupported bit # + ############################################################################## + local desc="Trace type with bit <n> only" + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + + for i in {0..22} + do + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ + prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 + + run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 ${bit2type[$i]} 123 + done + + bit2size[22]=$tmp +} + +in_oflag() +{ + ############################################################################## + # Make sure that the receiving node won't fill the trace since the Overflow # + # flag is set. # + ############################################################################## + local desc="Overflow flag is set" + + # Exception: + # Here, we need the sender to set the Overflow flag. For that, we will add + # back the IOAM namespace that was previously configured on the sender. + ip -netns ioam-node-alpha ioam namespace add 123 + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xc00000 123 + + # And we clean the exception for this test to get things back to normal for + # other INPUT tests + ip -netns ioam-node-alpha ioam namespace del 123 +} + +in_full_supp_trace() +{ + ############################################################################## + # Make sure that the receiving node will correctly fill a full trace. Be # + # careful, "full trace" here does NOT mean all bits (only supported ones). # + ############################################################################## + local desc="Full supported trace" + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ + type 0xfff002 ns 123 size 80 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ + db01::1 veth0 0xfff002 123 +} + + +################################################################################ +# # +# GLOBAL tests # +# # +# Three nodes (sender/router/receiver), IOAM fully enabled on every node. # +################################################################################ + +fwd_full_supp_trace() +{ + ############################################################################## + # Make sure that all three nodes correctly filled the full supported trace # + # by checking that the trace data is consistent with the predefined config. # + ############################################################################## + local desc="Forward - Full supported trace" + + ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \ + type 0xfff002 ns 123 size 244 via db01::1 dev veth0 + + run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \ + db02::2 veth0 0xfff002 123 +} + + +################################################################################ +# # +# MAIN # +# # +################################################################################ + +if [ "$(id -u)" -ne 0 ] +then + echo "SKIP: Need root privileges" + exit 1 +fi + +if [ ! -x "$(command -v ip)" ] +then + echo "SKIP: Could not run test without ip tool" + exit 1 +fi + +ip ioam &>/dev/null +if [ $? = 1 ] +then + echo "SKIP: iproute2 too old, missing ioam command" + exit 1 +fi + +check_kernel_compatibility + cleanup &>/dev/null setup run diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 2256cf5ad637..d376cb2c383c 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -2,19 +2,20 @@ /* * Author: Justin Iurman (justin.iurman@uliege.be) * - * IOAM parser for IPv6, see ioam6.sh for details. + * IOAM tester for IPv6, see ioam6.sh for details on each test case. */ -#include <asm/byteorder.h> +#include <arpa/inet.h> +#include <errno.h> +#include <limits.h> #include <linux/const.h> #include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> -#include <sys/socket.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -struct node_args { +struct ioam_config { __u32 id; __u64 wide; __u16 ingr_id; @@ -24,143 +25,325 @@ struct node_args { __u32 ns_data; __u64 ns_wide; __u32 sc_id; - __u8 hop_limit; - __u8 *sc_data; /* NULL when sc_id = 0xffffff (default empty value) */ + __u8 hlim; + char *sc_data; }; -/* expected args per node, in that order */ -enum { - NODE_ARG_HOP_LIMIT, - NODE_ARG_ID, - NODE_ARG_WIDE, - NODE_ARG_INGR_ID, - NODE_ARG_INGR_WIDE, - NODE_ARG_EGR_ID, - NODE_ARG_EGR_WIDE, - NODE_ARG_NS_DATA, - NODE_ARG_NS_WIDE, - NODE_ARG_SC_ID, - __NODE_ARG_MAX, +/* + * Be careful if you modify structs below - everything MUST be kept synchronized + * with configurations inside ioam6.sh and always reflect the same. + */ + +static struct ioam_config node1 = { + .id = 1, + .wide = 11111111, + .ingr_id = 0xffff, /* default value */ + .egr_id = 101, + .ingr_wide = 0xffffffff, /* default value */ + .egr_wide = 101101, + .ns_data = 0xdeadbee0, + .ns_wide = 0xcafec0caf00dc0de, + .sc_id = 777, + .sc_data = "something that will be 4n-aligned", + .hlim = 64, }; -#define NODE_ARGS_SIZE __NODE_ARG_MAX +static struct ioam_config node2 = { + .id = 2, + .wide = 22222222, + .ingr_id = 201, + .egr_id = 202, + .ingr_wide = 201201, + .egr_wide = 202202, + .ns_data = 0xdeadbee1, + .ns_wide = 0xcafec0caf11dc0de, + .sc_id = 666, + .sc_data = "Hello there -Obi", + .hlim = 63, +}; -struct args { - __u16 ns_id; - __u32 trace_type; - __u8 n_node; - __u8 *ifname; - struct node_args node[0]; +static struct ioam_config node3 = { + .id = 3, + .wide = 33333333, + .ingr_id = 301, + .egr_id = 0xffff, /* default value */ + .ingr_wide = 301301, + .egr_wide = 0xffffffff, /* default value */ + .ns_data = 0xdeadbee2, + .ns_wide = 0xcafec0caf22dc0de, + .sc_id = 0xffffff, /* default value */ + .sc_data = NULL, + .hlim = 62, }; -/* expected args, in that order */ enum { - ARG_IFNAME, - ARG_N_NODE, - ARG_NS_ID, - ARG_TRACE_TYPE, - __ARG_MAX, + /********** + * OUTPUT * + **********/ + TEST_OUT_UNDEF_NS, + TEST_OUT_NO_ROOM, + TEST_OUT_BIT0, + TEST_OUT_BIT1, + TEST_OUT_BIT2, + TEST_OUT_BIT3, + TEST_OUT_BIT4, + TEST_OUT_BIT5, + TEST_OUT_BIT6, + TEST_OUT_BIT7, + TEST_OUT_BIT8, + TEST_OUT_BIT9, + TEST_OUT_BIT10, + TEST_OUT_BIT11, + TEST_OUT_BIT12, + TEST_OUT_BIT13, + TEST_OUT_BIT14, + TEST_OUT_BIT15, + TEST_OUT_BIT16, + TEST_OUT_BIT17, + TEST_OUT_BIT18, + TEST_OUT_BIT19, + TEST_OUT_BIT20, + TEST_OUT_BIT21, + TEST_OUT_BIT22, + TEST_OUT_FULL_SUPP_TRACE, + + /********* + * INPUT * + *********/ + TEST_IN_UNDEF_NS, + TEST_IN_NO_ROOM, + TEST_IN_OFLAG, + TEST_IN_BIT0, + TEST_IN_BIT1, + TEST_IN_BIT2, + TEST_IN_BIT3, + TEST_IN_BIT4, + TEST_IN_BIT5, + TEST_IN_BIT6, + TEST_IN_BIT7, + TEST_IN_BIT8, + TEST_IN_BIT9, + TEST_IN_BIT10, + TEST_IN_BIT11, + TEST_IN_BIT12, + TEST_IN_BIT13, + TEST_IN_BIT14, + TEST_IN_BIT15, + TEST_IN_BIT16, + TEST_IN_BIT17, + TEST_IN_BIT18, + TEST_IN_BIT19, + TEST_IN_BIT20, + TEST_IN_BIT21, + TEST_IN_BIT22, + TEST_IN_FULL_SUPP_TRACE, + + /********** + * GLOBAL * + **********/ + TEST_FWD_FULL_SUPP_TRACE, + + __TEST_MAX, }; -#define ARGS_SIZE __ARG_MAX +static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, + __u32 trace_type, __u16 ioam_ns) +{ + if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns || + __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8)) + return 1; -int check_ioam6_node_data(__u8 **p, struct ioam6_trace_hdr *trace, __u8 hlim, - __u32 id, __u64 wide, __u16 ingr_id, __u32 ingr_wide, - __u16 egr_id, __u32 egr_wide, __u32 ns_data, - __u64 ns_wide, __u32 sc_id, __u8 *sc_data) + switch (tid) { + case TEST_OUT_UNDEF_NS: + case TEST_IN_UNDEF_NS: + return ioam6h->overflow || + ioam6h->nodelen != 1 || + ioam6h->remlen != 1; + + case TEST_OUT_NO_ROOM: + case TEST_IN_NO_ROOM: + case TEST_IN_OFLAG: + return !ioam6h->overflow || + ioam6h->nodelen != 2 || + ioam6h->remlen != 1; + + case TEST_OUT_BIT0: + case TEST_IN_BIT0: + case TEST_OUT_BIT1: + case TEST_IN_BIT1: + case TEST_OUT_BIT2: + case TEST_IN_BIT2: + case TEST_OUT_BIT3: + case TEST_IN_BIT3: + case TEST_OUT_BIT4: + case TEST_IN_BIT4: + case TEST_OUT_BIT5: + case TEST_IN_BIT5: + case TEST_OUT_BIT6: + case TEST_IN_BIT6: + case TEST_OUT_BIT7: + case TEST_IN_BIT7: + case TEST_OUT_BIT11: + case TEST_IN_BIT11: + return ioam6h->overflow || + ioam6h->nodelen != 1 || + ioam6h->remlen; + + case TEST_OUT_BIT8: + case TEST_IN_BIT8: + case TEST_OUT_BIT9: + case TEST_IN_BIT9: + case TEST_OUT_BIT10: + case TEST_IN_BIT10: + return ioam6h->overflow || + ioam6h->nodelen != 2 || + ioam6h->remlen; + + case TEST_OUT_BIT12: + case TEST_IN_BIT12: + case TEST_OUT_BIT13: + case TEST_IN_BIT13: + case TEST_OUT_BIT14: + case TEST_IN_BIT14: + case TEST_OUT_BIT15: + case TEST_IN_BIT15: + case TEST_OUT_BIT16: + case TEST_IN_BIT16: + case TEST_OUT_BIT17: + case TEST_IN_BIT17: + case TEST_OUT_BIT18: + case TEST_IN_BIT18: + case TEST_OUT_BIT19: + case TEST_IN_BIT19: + case TEST_OUT_BIT20: + case TEST_IN_BIT20: + case TEST_OUT_BIT21: + case TEST_IN_BIT21: + return ioam6h->overflow || + ioam6h->nodelen || + ioam6h->remlen != 1; + + case TEST_OUT_BIT22: + case TEST_IN_BIT22: + return ioam6h->overflow || + ioam6h->nodelen || + ioam6h->remlen; + + case TEST_OUT_FULL_SUPP_TRACE: + case TEST_IN_FULL_SUPP_TRACE: + case TEST_FWD_FULL_SUPP_TRACE: + return ioam6h->overflow || + ioam6h->nodelen != 15 || + ioam6h->remlen; + + default: + break; + } + + return 1; +} + +static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, + const struct ioam_config cnf) { + unsigned int len; + __u8 aligned; __u64 raw64; __u32 raw32; - __u8 sc_len; - if (trace->type.bit0) { + if (ioam6h->type.bit0) { raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (hlim != (raw32 >> 24) || id != (raw32 & 0xffffff)) + if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) return 1; *p += sizeof(__u32); } - if (trace->type.bit1) { + if (ioam6h->type.bit1) { raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (ingr_id != (raw32 >> 16) || egr_id != (raw32 & 0xffff)) + if (cnf.ingr_id != (raw32 >> 16) || + cnf.egr_id != (raw32 & 0xffff)) return 1; *p += sizeof(__u32); } - if (trace->type.bit2) + if (ioam6h->type.bit2) *p += sizeof(__u32); - if (trace->type.bit3) + if (ioam6h->type.bit3) *p += sizeof(__u32); - if (trace->type.bit4) { + if (ioam6h->type.bit4) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) return 1; *p += sizeof(__u32); } - if (trace->type.bit5) { - if (__be32_to_cpu(*((__u32 *)*p)) != ns_data) + if (ioam6h->type.bit5) { + if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data) return 1; *p += sizeof(__u32); } - if (trace->type.bit6) { + if (ioam6h->type.bit6) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) return 1; *p += sizeof(__u32); } - if (trace->type.bit7) { + if (ioam6h->type.bit7) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) return 1; *p += sizeof(__u32); } - if (trace->type.bit8) { + if (ioam6h->type.bit8) { raw64 = __be64_to_cpu(*((__u64 *)*p)); - if (hlim != (raw64 >> 56) || wide != (raw64 & 0xffffffffffffff)) + if (cnf.hlim != (raw64 >> 56) || + cnf.wide != (raw64 & 0xffffffffffffff)) return 1; *p += sizeof(__u64); } - if (trace->type.bit9) { - if (__be32_to_cpu(*((__u32 *)*p)) != ingr_wide) + if (ioam6h->type.bit9) { + if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide) return 1; *p += sizeof(__u32); - if (__be32_to_cpu(*((__u32 *)*p)) != egr_wide) + if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide) return 1; *p += sizeof(__u32); } - if (trace->type.bit10) { - if (__be64_to_cpu(*((__u64 *)*p)) != ns_wide) + if (ioam6h->type.bit10) { + if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide) return 1; *p += sizeof(__u64); } - if (trace->type.bit11) { + if (ioam6h->type.bit11) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) return 1; *p += sizeof(__u32); } - if (trace->type.bit22) { + if (ioam6h->type.bit22) { + len = cnf.sc_data ? strlen(cnf.sc_data) : 0; + aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; + raw32 = __be32_to_cpu(*((__u32 *)*p)); - sc_len = sc_data ? __ALIGN_KERNEL(strlen(sc_data), 4) : 0; - if (sc_len != (raw32 >> 24) * 4 || sc_id != (raw32 & 0xffffff)) + if (aligned != (raw32 >> 24) * 4 || + cnf.sc_id != (raw32 & 0xffffff)) return 1; *p += sizeof(__u32); - if (sc_data) { - if (strncmp(*p, sc_data, strlen(sc_data))) + if (cnf.sc_data) { + if (strncmp((char *)*p, cnf.sc_data, len)) return 1; - *p += strlen(sc_data); - sc_len -= strlen(sc_data); + *p += len; + aligned -= len; - while (sc_len--) { + while (aligned--) { if (**p != '\0') return 1; *p += sizeof(__u8); @@ -171,232 +354,367 @@ int check_ioam6_node_data(__u8 **p, struct ioam6_trace_hdr *trace, __u8 hlim, return 0; } -int check_ioam6_trace(struct ioam6_trace_hdr *trace, struct args *args) +static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h, + __u32 trace_type, __u16 ioam_ns) { __u8 *p; - int i; - if (__be16_to_cpu(trace->namespace_id) != args->ns_id || - __be32_to_cpu(trace->type_be32) != args->trace_type) + if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns)) return 1; - p = trace->data + trace->remlen * 4; - - for (i = args->n_node - 1; i >= 0; i--) { - if (check_ioam6_node_data(&p, trace, - args->node[i].hop_limit, - args->node[i].id, - args->node[i].wide, - args->node[i].ingr_id, - args->node[i].ingr_wide, - args->node[i].egr_id, - args->node[i].egr_wide, - args->node[i].ns_data, - args->node[i].ns_wide, - args->node[i].sc_id, - args->node[i].sc_data)) - return 1; + p = ioam6h->data + ioam6h->remlen * 4; + + switch (tid) { + case TEST_OUT_BIT0: + case TEST_OUT_BIT1: + case TEST_OUT_BIT2: + case TEST_OUT_BIT3: + case TEST_OUT_BIT4: + case TEST_OUT_BIT5: + case TEST_OUT_BIT6: + case TEST_OUT_BIT7: + case TEST_OUT_BIT8: + case TEST_OUT_BIT9: + case TEST_OUT_BIT10: + case TEST_OUT_BIT11: + case TEST_OUT_BIT22: + case TEST_OUT_FULL_SUPP_TRACE: + return check_ioam6_data(&p, ioam6h, node1); + + case TEST_IN_BIT0: + case TEST_IN_BIT1: + case TEST_IN_BIT2: + case TEST_IN_BIT3: + case TEST_IN_BIT4: + case TEST_IN_BIT5: + case TEST_IN_BIT6: + case TEST_IN_BIT7: + case TEST_IN_BIT8: + case TEST_IN_BIT9: + case TEST_IN_BIT10: + case TEST_IN_BIT11: + case TEST_IN_BIT22: + case TEST_IN_FULL_SUPP_TRACE: + { + __u32 tmp32 = node2.egr_wide; + __u16 tmp16 = node2.egr_id; + int res; + + node2.egr_id = 0xffff; + node2.egr_wide = 0xffffffff; + + res = check_ioam6_data(&p, ioam6h, node2); + + node2.egr_id = tmp16; + node2.egr_wide = tmp32; + + return res; } - return 0; -} - -int parse_node_args(int *argcp, char ***argvp, struct node_args *node) -{ - char **argv = *argvp; - - if (*argcp < NODE_ARGS_SIZE) - return 1; - - node->hop_limit = strtoul(argv[NODE_ARG_HOP_LIMIT], NULL, 10); - if (!node->hop_limit) { - node->hop_limit = strtoul(argv[NODE_ARG_HOP_LIMIT], NULL, 16); - if (!node->hop_limit) - return 1; - } - - node->id = strtoul(argv[NODE_ARG_ID], NULL, 10); - if (!node->id) { - node->id = strtoul(argv[NODE_ARG_ID], NULL, 16); - if (!node->id) - return 1; - } - - node->wide = strtoull(argv[NODE_ARG_WIDE], NULL, 10); - if (!node->wide) { - node->wide = strtoull(argv[NODE_ARG_WIDE], NULL, 16); - if (!node->wide) - return 1; - } - - node->ingr_id = strtoul(argv[NODE_ARG_INGR_ID], NULL, 10); - if (!node->ingr_id) { - node->ingr_id = strtoul(argv[NODE_ARG_INGR_ID], NULL, 16); - if (!node->ingr_id) + case TEST_FWD_FULL_SUPP_TRACE: + if (check_ioam6_data(&p, ioam6h, node3)) return 1; - } - - node->ingr_wide = strtoul(argv[NODE_ARG_INGR_WIDE], NULL, 10); - if (!node->ingr_wide) { - node->ingr_wide = strtoul(argv[NODE_ARG_INGR_WIDE], NULL, 16); - if (!node->ingr_wide) + if (check_ioam6_data(&p, ioam6h, node2)) return 1; - } + return check_ioam6_data(&p, ioam6h, node1); - node->egr_id = strtoul(argv[NODE_ARG_EGR_ID], NULL, 10); - if (!node->egr_id) { - node->egr_id = strtoul(argv[NODE_ARG_EGR_ID], NULL, 16); - if (!node->egr_id) - return 1; + default: + break; } - node->egr_wide = strtoul(argv[NODE_ARG_EGR_WIDE], NULL, 10); - if (!node->egr_wide) { - node->egr_wide = strtoul(argv[NODE_ARG_EGR_WIDE], NULL, 16); - if (!node->egr_wide) - return 1; - } + return 1; +} - node->ns_data = strtoul(argv[NODE_ARG_NS_DATA], NULL, 16); - if (!node->ns_data) - return 1; +static int str2id(const char *tname) +{ + if (!strcmp("out_undef_ns", tname)) + return TEST_OUT_UNDEF_NS; + if (!strcmp("out_no_room", tname)) + return TEST_OUT_NO_ROOM; + if (!strcmp("out_bit0", tname)) + return TEST_OUT_BIT0; + if (!strcmp("out_bit1", tname)) + return TEST_OUT_BIT1; + if (!strcmp("out_bit2", tname)) + return TEST_OUT_BIT2; + if (!strcmp("out_bit3", tname)) + return TEST_OUT_BIT3; + if (!strcmp("out_bit4", tname)) + return TEST_OUT_BIT4; + if (!strcmp("out_bit5", tname)) + return TEST_OUT_BIT5; + if (!strcmp("out_bit6", tname)) + return TEST_OUT_BIT6; + if (!strcmp("out_bit7", tname)) + return TEST_OUT_BIT7; + if (!strcmp("out_bit8", tname)) + return TEST_OUT_BIT8; + if (!strcmp("out_bit9", tname)) + return TEST_OUT_BIT9; + if (!strcmp("out_bit10", tname)) + return TEST_OUT_BIT10; + if (!strcmp("out_bit11", tname)) + return TEST_OUT_BIT11; + if (!strcmp("out_bit12", tname)) + return TEST_OUT_BIT12; + if (!strcmp("out_bit13", tname)) + return TEST_OUT_BIT13; + if (!strcmp("out_bit14", tname)) + return TEST_OUT_BIT14; + if (!strcmp("out_bit15", tname)) + return TEST_OUT_BIT15; + if (!strcmp("out_bit16", tname)) + return TEST_OUT_BIT16; + if (!strcmp("out_bit17", tname)) + return TEST_OUT_BIT17; + if (!strcmp("out_bit18", tname)) + return TEST_OUT_BIT18; + if (!strcmp("out_bit19", tname)) + return TEST_OUT_BIT19; + if (!strcmp("out_bit20", tname)) + return TEST_OUT_BIT20; + if (!strcmp("out_bit21", tname)) + return TEST_OUT_BIT21; + if (!strcmp("out_bit22", tname)) + return TEST_OUT_BIT22; + if (!strcmp("out_full_supp_trace", tname)) + return TEST_OUT_FULL_SUPP_TRACE; + if (!strcmp("in_undef_ns", tname)) + return TEST_IN_UNDEF_NS; + if (!strcmp("in_no_room", tname)) + return TEST_IN_NO_ROOM; + if (!strcmp("in_oflag", tname)) + return TEST_IN_OFLAG; + if (!strcmp("in_bit0", tname)) + return TEST_IN_BIT0; + if (!strcmp("in_bit1", tname)) + return TEST_IN_BIT1; + if (!strcmp("in_bit2", tname)) + return TEST_IN_BIT2; + if (!strcmp("in_bit3", tname)) + return TEST_IN_BIT3; + if (!strcmp("in_bit4", tname)) + return TEST_IN_BIT4; + if (!strcmp("in_bit5", tname)) + return TEST_IN_BIT5; + if (!strcmp("in_bit6", tname)) + return TEST_IN_BIT6; + if (!strcmp("in_bit7", tname)) + return TEST_IN_BIT7; + if (!strcmp("in_bit8", tname)) + return TEST_IN_BIT8; + if (!strcmp("in_bit9", tname)) + return TEST_IN_BIT9; + if (!strcmp("in_bit10", tname)) + return TEST_IN_BIT10; + if (!strcmp("in_bit11", tname)) + return TEST_IN_BIT11; + if (!strcmp("in_bit12", tname)) + return TEST_IN_BIT12; + if (!strcmp("in_bit13", tname)) + return TEST_IN_BIT13; + if (!strcmp("in_bit14", tname)) + return TEST_IN_BIT14; + if (!strcmp("in_bit15", tname)) + return TEST_IN_BIT15; + if (!strcmp("in_bit16", tname)) + return TEST_IN_BIT16; + if (!strcmp("in_bit17", tname)) + return TEST_IN_BIT17; + if (!strcmp("in_bit18", tname)) + return TEST_IN_BIT18; + if (!strcmp("in_bit19", tname)) + return TEST_IN_BIT19; + if (!strcmp("in_bit20", tname)) + return TEST_IN_BIT20; + if (!strcmp("in_bit21", tname)) + return TEST_IN_BIT21; + if (!strcmp("in_bit22", tname)) + return TEST_IN_BIT22; + if (!strcmp("in_full_supp_trace", tname)) + return TEST_IN_FULL_SUPP_TRACE; + if (!strcmp("fwd_full_supp_trace", tname)) + return TEST_FWD_FULL_SUPP_TRACE; + + return -1; +} - node->ns_wide = strtoull(argv[NODE_ARG_NS_WIDE], NULL, 16); - if (!node->ns_wide) - return 1; +static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; +} - node->sc_id = strtoul(argv[NODE_ARG_SC_ID], NULL, 10); - if (!node->sc_id) { - node->sc_id = strtoul(argv[NODE_ARG_SC_ID], NULL, 16); - if (!node->sc_id) - return 1; - } +static int get_u32(__u32 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; - *argcp -= NODE_ARGS_SIZE; - *argvp += NODE_ARGS_SIZE; + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); - if (node->sc_id != 0xffffff) { - if (!*argcp) - return 1; + if (!ptr || ptr == arg || *ptr) + return -1; - node->sc_data = argv[NODE_ARG_SC_ID + 1]; + if (res == ULONG_MAX && errno == ERANGE) + return -1; - *argcp -= 1; - *argvp += 1; - } + if (res > 0xFFFFFFFFUL) + return -1; + *val = res; return 0; } -struct args *parse_args(int argc, char **argv) +static int get_u16(__u16 *val, const char *arg, int base) { - struct args *args; - int n_node, i; + unsigned long res; + char *ptr; - if (argc < ARGS_SIZE) - goto out; - - n_node = strtoul(argv[ARG_N_NODE], NULL, 10); - if (!n_node || n_node > 10) - goto out; - - args = calloc(1, sizeof(*args) + n_node * sizeof(struct node_args)); - if (!args) - goto out; + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); - args->ns_id = strtoul(argv[ARG_NS_ID], NULL, 10); - if (!args->ns_id) - goto free; + if (!ptr || ptr == arg || *ptr) + return -1; - args->trace_type = strtoul(argv[ARG_TRACE_TYPE], NULL, 16); - if (!args->trace_type) - goto free; - - args->n_node = n_node; - args->ifname = argv[ARG_IFNAME]; - - argv += ARGS_SIZE; - argc -= ARGS_SIZE; - - for (i = 0; i < n_node; i++) { - if (parse_node_args(&argc, &argv, &args->node[i])) - goto free; - } + if (res == ULONG_MAX && errno == ERANGE) + return -1; - if (argc) - goto free; + if (res > 0xFFFFUL) + return -1; - return args; -free: - free(args); -out: - return NULL; + *val = res; + return 0; } +static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { + [TEST_OUT_UNDEF_NS] = check_ioam_header, + [TEST_OUT_NO_ROOM] = check_ioam_header, + [TEST_OUT_BIT0] = check_ioam_header_and_data, + [TEST_OUT_BIT1] = check_ioam_header_and_data, + [TEST_OUT_BIT2] = check_ioam_header_and_data, + [TEST_OUT_BIT3] = check_ioam_header_and_data, + [TEST_OUT_BIT4] = check_ioam_header_and_data, + [TEST_OUT_BIT5] = check_ioam_header_and_data, + [TEST_OUT_BIT6] = check_ioam_header_and_data, + [TEST_OUT_BIT7] = check_ioam_header_and_data, + [TEST_OUT_BIT8] = check_ioam_header_and_data, + [TEST_OUT_BIT9] = check_ioam_header_and_data, + [TEST_OUT_BIT10] = check_ioam_header_and_data, + [TEST_OUT_BIT11] = check_ioam_header_and_data, + [TEST_OUT_BIT12] = check_ioam_header, + [TEST_OUT_BIT13] = check_ioam_header, + [TEST_OUT_BIT14] = check_ioam_header, + [TEST_OUT_BIT15] = check_ioam_header, + [TEST_OUT_BIT16] = check_ioam_header, + [TEST_OUT_BIT17] = check_ioam_header, + [TEST_OUT_BIT18] = check_ioam_header, + [TEST_OUT_BIT19] = check_ioam_header, + [TEST_OUT_BIT20] = check_ioam_header, + [TEST_OUT_BIT21] = check_ioam_header, + [TEST_OUT_BIT22] = check_ioam_header_and_data, + [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, + [TEST_IN_UNDEF_NS] = check_ioam_header, + [TEST_IN_NO_ROOM] = check_ioam_header, + [TEST_IN_OFLAG] = check_ioam_header, + [TEST_IN_BIT0] = check_ioam_header_and_data, + [TEST_IN_BIT1] = check_ioam_header_and_data, + [TEST_IN_BIT2] = check_ioam_header_and_data, + [TEST_IN_BIT3] = check_ioam_header_and_data, + [TEST_IN_BIT4] = check_ioam_header_and_data, + [TEST_IN_BIT5] = check_ioam_header_and_data, + [TEST_IN_BIT6] = check_ioam_header_and_data, + [TEST_IN_BIT7] = check_ioam_header_and_data, + [TEST_IN_BIT8] = check_ioam_header_and_data, + [TEST_IN_BIT9] = check_ioam_header_and_data, + [TEST_IN_BIT10] = check_ioam_header_and_data, + [TEST_IN_BIT11] = check_ioam_header_and_data, + [TEST_IN_BIT12] = check_ioam_header, + [TEST_IN_BIT13] = check_ioam_header, + [TEST_IN_BIT14] = check_ioam_header, + [TEST_IN_BIT15] = check_ioam_header, + [TEST_IN_BIT16] = check_ioam_header, + [TEST_IN_BIT17] = check_ioam_header, + [TEST_IN_BIT18] = check_ioam_header, + [TEST_IN_BIT19] = check_ioam_header, + [TEST_IN_BIT20] = check_ioam_header, + [TEST_IN_BIT21] = check_ioam_header, + [TEST_IN_BIT22] = check_ioam_header_and_data, + [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, + [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, +}; + int main(int argc, char **argv) { - int ret, fd, pkts, size, hoplen, found; - struct ioam6_trace_hdr *ioam6h; + int fd, size, hoplen, tid, ret = 1; + struct in6_addr src, dst; struct ioam6_hdr *opt; struct ipv6hdr *ip6h; __u8 buffer[400], *p; - struct args *args; + __u16 ioam_ns; + __u32 tr_type; - args = parse_args(argc - 1, argv + 1); - if (!args) { - ret = 1; + if (argc != 7) + goto out; + + tid = str2id(argv[2]); + if (tid < 0 || !func[tid]) + goto out; + + if (inet_pton(AF_INET6, argv[3], &src) != 1 || + inet_pton(AF_INET6, argv[4], &dst) != 1) + goto out; + + if (get_u32(&tr_type, argv[5], 16) || + get_u16(&ioam_ns, argv[6], 0)) goto out; - } fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); - if (!fd) { - ret = 1; + if (!fd) goto out; - } if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, - args->ifname, strlen(args->ifname))) { - ret = 1; + argv[1], strlen(argv[1]))) goto close; - } - pkts = 0; - found = 0; - while (pkts < 3 && !found) { - size = recv(fd, buffer, sizeof(buffer), 0); - ip6h = (struct ipv6hdr *)buffer; - pkts++; +recv: + size = recv(fd, buffer, sizeof(buffer), 0); + if (size <= 0) + goto close; - if (ip6h->nexthdr == IPPROTO_HOPOPTS) { - p = buffer + sizeof(*ip6h); - hoplen = (p[1] + 1) << 3; + ip6h = (struct ipv6hdr *)buffer; - p += sizeof(struct ipv6_hopopt_hdr); - while (hoplen > 0) { - opt = (struct ioam6_hdr *)p; + if (!ipv6_addr_equal(&ip6h->saddr, &src) || + !ipv6_addr_equal(&ip6h->daddr, &dst)) + goto recv; - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - found = 1; + if (ip6h->nexthdr != IPPROTO_HOPOPTS) + goto close; - p += sizeof(*opt); - ioam6h = (struct ioam6_trace_hdr *)p; + p = buffer + sizeof(*ip6h); + hoplen = (p[1] + 1) << 3; + p += sizeof(struct ipv6_hopopt_hdr); - ret = check_ioam6_trace(ioam6h, args); - break; - } + while (hoplen > 0) { + opt = (struct ioam6_hdr *)p; - p += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; - } + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + p += sizeof(*opt); + ret = func[tid](tid, (struct ioam6_trace_hdr *)p, + tr_type, ioam_ns); + break; } - } - if (!found) - ret = 1; + p += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; + } close: close(fd); out: - free(args); return ret; } diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index f23438d512c5..3d7dde2c321b 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -484,13 +484,16 @@ enum desc_type { MONITOR_ACQUIRE, EXPIRE_STATE, EXPIRE_POLICY, + SPDINFO_ATTRS, }; const char *desc_name[] = { "create tunnel", "alloc spi", "monitor acquire", "expire state", - "expire policy" + "expire policy", + "spdinfo attributes", + "" }; struct xfrm_desc { enum desc_type type; @@ -1593,6 +1596,155 @@ out_close: return ret; } +static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq, + unsigned thresh4_l, unsigned thresh4_r, + unsigned thresh6_l, unsigned thresh6_r, + bool add_bad_attr) + +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrmu_spdhthresh thresh; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + thresh.lbits = thresh4_l; + thresh.rbits = thresh4_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + thresh.lbits = thresh6_l; + thresh.rbits = thresh6_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + if (add_bad_attr) { + BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { + pr_err("adding attribute failed: no space"); + return -1; + } + } + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return -1; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + return 0; +} + +static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq) +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) { + pr_err("Can't set SPD HTHRESH"); + return KSFT_FAIL; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) { + size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused)); + struct rtattr *attr = (void *)req.attrbuf; + int got_thresh = 0; + + for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 32 || t->rbits != 31) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 120 || t->rbits != 16) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + } + if (got_thresh != 2) { + pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh); + return KSFT_FAIL; + } + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return KSFT_FAIL; + } else { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + /* Restore the default */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) { + pr_err("Can't restore SPD HTHRESH"); + return KSFT_FAIL; + } + + /* + * At this moment xfrm uses nlmsg_parse_deprecated(), which + * implies NL_VALIDATE_LIBERAL - ignoring attributes with + * (type > maxtype). nla_parse_depricated_strict() would enforce + * it. Or even stricter nla_parse(). + * Right now it's not expected to fail, but to be ignored. + */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true)) + return KSFT_PASS; + + return KSFT_PASS; +} + static int child_serv(int xfrm_sock, uint32_t *seq, unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) { @@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) case EXPIRE_POLICY: ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); break; + case SPDINFO_ATTRS: + ret = xfrm_spdinfo_attrs(xfrm_sock, &seq); + break; default: printk("Unknown desc type %d", desc.type); exit(KSFT_FAIL); @@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto) * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 * * Check the affected by the UABI difference structures. + * Also, check translation for xfrm_set_spdinfo: it has it's own attributes + * which needs to be correctly copied, but not translated. */ -const unsigned int compat_plan = 4; +const unsigned int compat_plan = 5; static int write_compat_struct_tests(int test_desc_fd) { struct xfrm_desc desc = {}; @@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd) if (__write_desc(test_desc_fd, &desc)) return -1; + desc.type = SPDINFO_ATTRS; + if (__write_desc(test_desc_fd, &desc)) + return -1; + return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f02f4de2f3a0..8c7117e2c337 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3,8 +3,10 @@ ret=0 sin="" +sinfail="" sout="" cin="" +cinfail="" cinsent="" cout="" ksft_skip=4 @@ -76,6 +78,14 @@ init() done } +init_shapers() +{ + for i in `seq 1 4`; do + tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1 + tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1 + done +} + cleanup_partial() { rm -f "$capout" @@ -88,8 +98,8 @@ cleanup_partial() cleanup() { - rm -f "$cin" "$cout" - rm -f "$sin" "$sout" "$cinsent" + rm -f "$cin" "$cout" "$sinfail" + rm -f "$sin" "$sout" "$cinsent" "$cinfail" cleanup_partial } @@ -211,11 +221,15 @@ link_failure() { ns="$1" - l=$((RANDOM%4)) - l=$((l+1)) + if [ -z "$FAILING_LINKS" ]; then + l=$((RANDOM%4)) + FAILING_LINKS=$((l+1)) + fi - veth="ns1eth$l" - ip -net "$ns" link set "$veth" down + for l in $FAILING_LINKS; do + veth="ns1eth$l" + ip -net "$ns" link set "$veth" down + done } # $1: IP address @@ -280,10 +294,17 @@ do_transfer() local_addr="0.0.0.0" fi - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - ${local_addr} < "$sin" > "$sout" & + if [ "$test_link_fail" -eq 2 ];then + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \ + ${local_addr} < "$sinfail" > "$sout" & + else + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + ${local_addr} < "$sin" > "$sout" & + fi spid=$! sleep 1 @@ -294,7 +315,7 @@ do_transfer() $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $connect_addr < "$cin" > "$cout" & else - ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \ + ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ tee "$cinsent" | \ timeout ${timeout_test} \ ip netns exec ${connector_ns} \ @@ -323,17 +344,18 @@ do_transfer() let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 + pos=1 dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then - id=${dump[1]} sleep 1 while [ $counter -le $rm_nr_ns1 ] do + id=${dump[$pos]} ip netns exec ${listener_ns} ./pm_nl_ctl del $id sleep 1 let counter+=1 - let id+=1 + let pos+=5 done fi elif [ $rm_nr_ns1 -eq 8 ]; then @@ -345,6 +367,12 @@ do_transfer() fi fi + flags="subflow" + if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then + flags="${flags},fullmesh" + addr_nr_ns2=${addr_nr_ns2:9} + fi + if [ $addr_nr_ns2 -gt 0 ]; then let add_nr_ns2=addr_nr_ns2 counter=3 @@ -356,7 +384,7 @@ do_transfer() else addr="10.0.$counter.2" fi - ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow + ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags let counter+=1 let add_nr_ns2-=1 done @@ -365,17 +393,18 @@ do_transfer() let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 + pos=1 dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then - id=${dump[1]} sleep 1 while [ $counter -le $rm_nr_ns2 ] do + id=${dump[$pos]} ip netns exec ${connector_ns} ./pm_nl_ctl del $id sleep 1 let counter+=1 - let id+=1 + let pos+=5 done fi elif [ $rm_nr_ns2 -eq 8 ]; then @@ -434,7 +463,11 @@ do_transfer() return 1 fi - check_transfer $sin $cout "file received by client" + if [ "$test_link_fail" -eq 2 ];then + check_transfer $sinfail $cout "file received by client" + else + check_transfer $sin $cout "file received by client" + fi retc=$? if [ "$test_link_fail" -eq 0 ];then check_transfer $cin $sout "file received by server" @@ -477,29 +510,33 @@ run_tests() lret=0 oldin="" - if [ "$test_linkfail" -eq 1 ];then - size=$((RANDOM%1024)) + # create the input file for the failure test when + # the first failure test run + if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then + # the client file must be considerably larger + # of the maximum expected cwin value, or the + # link utilization will be not predicable + size=$((RANDOM%2)) size=$((size+1)) - size=$((size*128)) + size=$((size*8192)) + size=$((size + ( $RANDOM % 8192) )) - oldin=$(mktemp) - cp "$cin" "$oldin" - make_file "$cin" "client" $size + cinfail=$(mktemp) + make_file "$cinfail" "client" $size fi - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} - lret=$? + if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then + size=$((RANDOM%16)) + size=$((size+1)) + size=$((size*2048)) - if [ "$test_linkfail" -eq 1 ];then - cp "$oldin" "$cin" - rm -f "$oldin" + sinfail=$(mktemp) + make_file "$sinfail" "server" $size fi - if [ $lret -ne 0 ]; then - ret=$lret - return - fi + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ + ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} + lret=$? } chk_csum_nr() @@ -593,6 +630,46 @@ chk_join_nr() fi } +# a negative value for 'stale_max' means no upper bound: +# for bidirectional transfer, if one peer sleep for a while +# - as these tests do - we can have a quite high number of +# stale/recover conversions, proportional to +# sleep duration/ MPTCP-level RTX interval. +chk_stale_nr() +{ + local ns=$1 + local stale_min=$2 + local stale_max=$3 + local stale_delta=$4 + local dump_stats + local stale_nr + local recover_nr + + printf "%-39s %-18s" " " "stale" + stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'` + [ -z "$stale_nr" ] && stale_nr=0 + recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'` + [ -z "$recover_nr" ] && recover_nr=0 + + if [ $stale_nr -lt $stale_min ] || + [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] || + [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then + echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \ + " expected stale in range [$stale_min..$stale_max]," \ + " stale-recover delta $stale_delta " + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo $ns stats + ip netns exec $ns ip -s link show + ip netns exec $ns nstat -as | grep MPTcp + fi +} + chk_add_nr() { local add_nr=$1 @@ -801,6 +878,27 @@ chk_prio_nr() fi } +chk_link_usage() +{ + local ns=$1 + local link=$2 + local out=$3 + local expected_rate=$4 + local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes` + local tx_total=`ls -l $out | awk '{print $5}'` + local tx_rate=$((tx_link * 100 / $tx_total)) + local tolerance=5 + + printf "%-39s %-18s" " " "link usage" + if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \ + $tx_rate -gt $((expected_rate + $tolerance)) ]; then + echo "[fail] got $tx_rate% usage, expected $expected_rate%" + ret=1 + else + echo "[ ok ]" + fi +} + subflows_tests() { reset @@ -924,14 +1022,80 @@ link_failure_tests() { # accept and use add_addr with additional subflows and link loss reset + + # without any b/w limit each veth could spool the packets and get + # them acked at xmit time, so that the corresponding subflow will + # have almost always no outstanding pkts, the scheduler will pick + # always the first subflow and we will have hard time testing + # active backup and link switch-over. + # Let's set some arbitrary (low) virtual link limits. + init_shapers ip netns exec $ns1 ./pm_nl_ctl limits 0 3 - ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 - ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow - ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow run_tests $ns1 $ns2 10.0.1.1 1 chk_join_nr "multiple flows, signal, link failure" 3 3 3 chk_add_nr 1 1 + chk_stale_nr $ns2 1 5 1 + + # accept and use add_addr with additional subflows and link loss + # for bidirectional transfer + reset + init_shapers + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow + run_tests $ns1 $ns2 10.0.1.1 2 + chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 -1 1 + + # 2 subflows plus 1 backup subflow with a lossy link, backup + # will never be used + reset + init_shapers + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + export FAILING_LINKS="1" + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 1 + chk_join_nr "backup subflow unused, link failure" 2 2 2 + chk_add_nr 1 1 + chk_link_usage $ns2 ns2eth3 $cinsent 0 + + # 2 lossy links after half transfer, backup will get half of + # the traffic + reset + init_shapers + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup + export FAILING_LINKS="1 2" + run_tests $ns1 $ns2 10.0.1.1 1 + chk_join_nr "backup flow used, multi links fail" 2 2 2 + chk_add_nr 1 1 + chk_stale_nr $ns2 2 4 2 + chk_link_usage $ns2 ns2eth3 $cinsent 50 + + # use a backup subflow with the first subflow on a lossy link + # for bidirectional transfer + reset + init_shapers + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 2 + chk_join_nr "backup flow used, bidi, link failure" 2 2 2 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 -1 2 + chk_link_usage $ns2 ns2eth3 $cinsent 50 } add_addr_timeout_tests() @@ -1530,6 +1694,55 @@ deny_join_id0_tests() chk_join_nr "subflow and address allow join id0 2" 1 1 1 } +fullmesh_tests() +{ + # fullmesh 1 + # 2 fullmesh addrs in ns2, added before the connection, + # 1 non-fullmesh addr in ns1, added during the connection. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 4 + ip netns exec $ns2 ./pm_nl_ctl limits 1 4 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh + run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + chk_join_nr "fullmesh test 2x1" 4 4 4 + chk_add_nr 1 1 + + # fullmesh 2 + # 1 non-fullmesh addr in ns1, added before the connection, + # 1 fullmesh addr in ns2, added during the connection. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow + chk_join_nr "fullmesh test 1x1" 3 3 3 + chk_add_nr 1 1 + + # fullmesh 3 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 5 + ip netns exec $ns2 ./pm_nl_ctl limits 1 5 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + chk_join_nr "fullmesh test 1x2" 5 5 5 + chk_add_nr 1 1 + + # fullmesh 4 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection, + # limit max_subflows to 4. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 4 + ip netns exec $ns2 ./pm_nl_ctl limits 1 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + chk_join_nr "fullmesh test 1x2, limited" 4 4 4 + chk_add_nr 1 1 +} + all_tests() { subflows_tests @@ -1545,6 +1758,7 @@ all_tests() syncookies_tests checksum_tests deny_join_id0_tests + fullmesh_tests } usage() @@ -1563,6 +1777,7 @@ usage() echo " -k syncookies_tests" echo " -S checksum_tests" echo " -d deny_join_id0_tests" + echo " -m fullmesh_tests" echo " -c capture pcap files" echo " -C enable data checksum" echo " -h help" @@ -1598,7 +1813,7 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fsltra64bpkdchCS' opt; do +while getopts 'fsltra64bpkdmchCS' opt; do case $opt in f) subflows_tests @@ -1639,6 +1854,9 @@ while getopts 'fsltra64bpkdchCS' opt; do d) deny_join_id0_tests ;; + m) + fullmesh_tests + ;; c) ;; C) diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 115decfdc1ef..354784512748 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -25,7 +25,7 @@ static void syntax(char *argv[]) { fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]); - fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); + fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n"); @@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; else error(1, errno, "unknown flag %s", argv[arg]); } + if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL && + flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + error(1, errno, "error flag fullmesh"); + } + rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; rta->rta_len = RTA_LENGTH(4); @@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + printf("fullmesh"); + flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH; + if (flags) + printf(","); + } + /* bump unknown flags, if any */ if (flags) printf("0x%x", flags); diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index dee7a3d6c5a5..92bbc5a15c39 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -55,10 +55,27 @@ static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m, return true; } +static void reverse_bytes(void *data, int length) +{ + int i = 0; + int j = length - 1; + uint8_t temp; + uint8_t *ptr = data; + + while (i < j) { + temp = ptr[i]; + ptr[i] = ptr[j]; + ptr[j] = temp; + i++; + j--; + } +} + static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, uint8_t *q2) { struct q1q2_ctx ctx; + int len; if (!alloc_q1q2_ctx(s, m, &ctx)) { fprintf(stderr, "Not enough memory for Q1Q2 calculation\n"); @@ -89,8 +106,10 @@ static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, goto out; } - BN_bn2bin(ctx.q1, q1); - BN_bn2bin(ctx.q2, q2); + len = BN_bn2bin(ctx.q1, q1); + reverse_bytes(q1, len); + len = BN_bn2bin(ctx.q2, q2); + reverse_bytes(q2, len); free_q1q2_ctx(&ctx); return true; @@ -152,22 +171,6 @@ static RSA *gen_sign_key(void) return key; } -static void reverse_bytes(void *data, int length) -{ - int i = 0; - int j = length - 1; - uint8_t temp; - uint8_t *ptr = data; - - while (i < j) { - temp = ptr[i]; - ptr[i] = ptr[j]; - ptr[j] = temp; - i++; - j--; - } -} - enum mrtags { MRECREATE = 0x0045544145524345, MREADD = 0x0000000044444145, @@ -367,8 +370,6 @@ bool encl_measure(struct encl *encl) /* BE -> LE */ reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE); reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE); - reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE); - reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE); EVP_MD_CTX_destroy(ctx); RSA_free(key); diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index b587b9a7a124..0d7bbe49359d 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -4,7 +4,8 @@ test: virtio_test vringh_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o -CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h +CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h +LDFLAGS += -lpthread vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} diff --git a/tools/virtio/linux/spinlock.h b/tools/virtio/linux/spinlock.h new file mode 100644 index 000000000000..028e3cdcc5d3 --- /dev/null +++ b/tools/virtio/linux/spinlock.h @@ -0,0 +1,56 @@ +#ifndef SPINLOCK_H_STUB +#define SPINLOCK_H_STUB + +#include <pthread.h> + +typedef pthread_spinlock_t spinlock_t; + +static inline void spin_lock_init(spinlock_t *lock) +{ + int r = pthread_spin_init(lock, 0); + assert(!r); +} + +static inline void spin_lock(spinlock_t *lock) +{ + int ret = pthread_spin_lock(lock); + assert(!ret); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + int ret = pthread_spin_unlock(lock); + assert(!ret); +} + +static inline void spin_lock_bh(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_bh(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irq(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irq(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irqsave(spinlock_t *lock, unsigned long f) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f) +{ + spin_unlock(lock); +} + +#endif diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 5d90254ddae4..363b98228301 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -3,6 +3,7 @@ #define LINUX_VIRTIO_H #include <linux/scatterlist.h> #include <linux/kernel.h> +#include <linux/spinlock.h> struct device { void *parent; @@ -12,6 +13,7 @@ struct virtio_device { struct device dev; u64 features; struct list_head vqs; + spinlock_t vqs_list_lock; }; struct virtqueue { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d20fba0fc290..b50dbe269f4b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) { + static DEFINE_MUTEX(kvm_debugfs_lock); + struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; @@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); - kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_lock(&kvm_debugfs_lock); + dent = debugfs_lookup(dir_name, kvm_debugfs_dir); + if (dent) { + pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name); + dput(dent); + mutex_unlock(&kvm_debugfs_lock); + return 0; + } + dent = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_unlock(&kvm_debugfs_lock); + if (IS_ERR(dent)) + return 0; + kvm->debugfs_dentry = dent; kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); @@ -5201,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { + if (kvm->debugfs_dentry) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { |